Browse Source

Merge pull request #90 from godlikepanos/mediump

Add relaxed precision support
Panagiotis Christopoulos Charitos 4 years ago
parent
commit
68261a78cf
48 changed files with 538 additions and 497 deletions
  1. 1 1
      AnKi/Gr/Vulkan/GpuMemoryManager.cpp
  2. 1 1
      AnKi/Renderer/DownscaleBlur.cpp
  3. 29 34
      AnKi/Renderer/VolumetricFog.cpp
  4. 0 2
      AnKi/Renderer/VolumetricFog.h
  5. 1 1
      AnKi/Resource/ShaderProgramResource.cpp
  6. 27 1
      AnKi/ShaderCompiler/MaliOfflineCompiler.cpp
  7. 4 1
      AnKi/ShaderCompiler/ShaderProgramParser.cpp
  8. 12 10
      AnKi/Shaders/ApplyIrradianceToReflection.ankiprog
  9. 7 8
      AnKi/Shaders/Bloom.ankiprog
  10. 24 27
      AnKi/Shaders/BloomUpscale.ankiprog
  11. 5 4
      AnKi/Shaders/ClusteredShadingCommon.glsl
  12. 7 1
      AnKi/Shaders/Common.glsl
  13. 10 11
      AnKi/Shaders/DownscaleBlur.glsl
  14. 0 0
      AnKi/Shaders/DownscaleBlurRaster.ankiprog
  15. 15 15
      AnKi/Shaders/FinalComposite.ankiprog
  16. 11 11
      AnKi/Shaders/ForwardShadingCommonFrag.glsl
  17. 0 5
      AnKi/Shaders/ForwardShadingCommonVert.glsl
  18. 3 3
      AnKi/Shaders/ForwardShadingFog.ankiprog
  19. 10 10
      AnKi/Shaders/ForwardShadingParticles.ankiprog
  20. 2 2
      AnKi/Shaders/Fsr.ankiprog
  21. 10 10
      AnKi/Shaders/GBufferCommon.glsl
  22. 30 30
      AnKi/Shaders/GBufferGeneric.ankiprog
  23. 9 9
      AnKi/Shaders/GBufferGpuParticles.ankiprog
  24. 15 15
      AnKi/Shaders/GBufferPost.ankiprog
  25. 24 24
      AnKi/Shaders/Include/ClusteredShadingTypes.h
  26. 2 0
      AnKi/Shaders/Include/Common.h
  27. 4 4
      AnKi/Shaders/Include/IndirectDiffuseTypes.h
  28. 1 1
      AnKi/Shaders/Include/LensFlareTypes.h
  29. 17 17
      AnKi/Shaders/IndirectDiffuse.ankiprog
  30. 9 8
      AnKi/Shaders/IndirectDiffuseDenoise.ankiprog
  31. 20 19
      AnKi/Shaders/IrradianceDice.ankiprog
  32. 5 10
      AnKi/Shaders/LensFlareSprite.ankiprog
  33. 76 58
      AnKi/Shaders/LightFunctions.glsl
  34. 25 22
      AnKi/Shaders/LightShading.ankiprog
  35. 4 4
      AnKi/Shaders/LightShadingApplyFog.ankiprog
  36. 9 5
      AnKi/Shaders/LightShadingApplyIndirect.ankiprog
  37. 5 6
      AnKi/Shaders/MotionBlur.glsl
  38. 32 39
      AnKi/Shaders/PackFunctions.glsl
  39. 1 1
      AnKi/Shaders/RtShadowsDenoise.ankiprog
  40. 1 1
      AnKi/Shaders/RtShadowsRayGen.ankiprog
  41. 7 7
      AnKi/Shaders/ShadowmapsResolve.ankiprog
  42. 5 4
      AnKi/Shaders/Ssr.ankiprog
  43. 1 1
      AnKi/Shaders/TonemappingAverageLuminance.ankiprog
  44. 24 24
      AnKi/Shaders/TonemappingFunctions.glsl
  45. 5 3
      AnKi/Shaders/TraditionalDeferredShading.ankiprog
  46. 26 25
      AnKi/Shaders/VolumetricFogAccumulation.ankiprog
  47. 1 1
      Tools/Android/app/build.gradle
  48. 1 1
      Tools/Android/build.gradle

+ 1 - 1
AnKi/Gr/Vulkan/GpuMemoryManager.cpp

@@ -34,7 +34,7 @@ Error GpuMemoryManagerInterface::allocateChunk(U32 classIdx, GpuMemoryManagerChu
 	}
 
 	VkDeviceMemory memHandle;
-	if(ANKI_UNLIKELY(vkAllocateMemory(m_parent->m_dev, &ci, nullptr, &memHandle) < 0))
+	if(ANKI_UNLIKELY(vkAllocateMemory(m_parent->m_dev, &ci, nullptr, &memHandle) != VK_SUCCESS))
 	{
 		ANKI_VK_LOGF("Out of GPU memory. Mem type index %u, size %zu", m_memTypeIdx,
 					 m_classInfos[classIdx].m_suballocationSize);

+ 1 - 1
AnKi/Renderer/DownscaleBlur.cpp

@@ -74,7 +74,7 @@ Error DownscaleBlur::initInternal()
 	}
 	else
 	{
-		ANKI_CHECK(getResourceManager().loadResource("Shaders/DownscaleBlur.ankiprog", m_prog));
+		ANKI_CHECK(getResourceManager().loadResource("Shaders/DownscaleBlurRaster.ankiprog", m_prog));
 		m_prog->getOrCreateVariant(variant);
 	}
 	m_grProg = variant->getProgram();

+ 29 - 34
AnKi/Renderer/VolumetricFog.cpp

@@ -49,53 +49,48 @@ Error VolumetricFog::init()
 	return Error::NONE;
 }
 
-void VolumetricFog::run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
+void VolumetricFog::populateRenderGraph(RenderingContext& ctx)
 {
-	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
+	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 
-	cmdb->bindShaderProgram(m_grProg);
+	m_runCtx.m_rt = rgraph.newRenderTarget(m_rtDescr);
 
-	cmdb->bindSampler(0, 0, m_r->getSamplers().m_trilinearClamp);
-	rgraphCtx.bindColorTexture(0, 1, m_r->getVolumetricLightingAccumulation().getRt());
+	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Vol fog");
 
-	rgraphCtx.bindImage(0, 2, m_runCtx.m_rt, TextureSubresourceInfo());
+	pass.newDependency({m_runCtx.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE});
+	pass.newDependency({m_r->getVolumetricLightingAccumulation().getRt(), TextureUsageBit::SAMPLED_COMPUTE});
 
-	struct PushConsts
-	{
-		F32 m_fogScatteringCoeff;
-		F32 m_fogAbsorptionCoeff;
-		F32 m_density;
-		F32 m_near;
-		Vec3 m_fogDiffuse;
-		F32 m_far;
-	} regs;
+	pass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) -> void {
+		CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 
-	regs.m_fogScatteringCoeff = m_fogScatteringCoeff;
-	regs.m_fogAbsorptionCoeff = m_fogAbsorptionCoeff;
-	regs.m_density = m_fogDensity;
-	regs.m_fogDiffuse = m_fogDiffuseColor;
-	regs.m_near = ctx.m_renderQueue->m_cameraNear;
-	regs.m_far = ctx.m_renderQueue->m_cameraFar;
+		cmdb->bindShaderProgram(m_grProg);
 
-	cmdb->setPushConstants(&regs, sizeof(regs));
+		cmdb->bindSampler(0, 0, m_r->getSamplers().m_trilinearClamp);
+		rgraphCtx.bindColorTexture(0, 1, m_r->getVolumetricLightingAccumulation().getRt());
 
-	dispatchPPCompute(cmdb, m_workgroupSize[0], m_workgroupSize[1], m_volumeSize[0], m_volumeSize[1]);
-}
+		rgraphCtx.bindImage(0, 2, m_runCtx.m_rt, TextureSubresourceInfo());
 
-void VolumetricFog::populateRenderGraph(RenderingContext& ctx)
-{
-	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
+		struct PushConsts
+		{
+			F32 m_fogScatteringCoeff;
+			F32 m_fogAbsorptionCoeff;
+			F32 m_density;
+			F32 m_near;
+			Vec3 m_fogDiffuse;
+			F32 m_far;
+		} regs;
 
-	m_runCtx.m_rt = rgraph.newRenderTarget(m_rtDescr);
+		regs.m_fogScatteringCoeff = m_fogScatteringCoeff;
+		regs.m_fogAbsorptionCoeff = m_fogAbsorptionCoeff;
+		regs.m_density = m_fogDensity;
+		regs.m_fogDiffuse = m_fogDiffuseColor;
+		regs.m_near = ctx.m_renderQueue->m_cameraNear;
+		regs.m_far = ctx.m_renderQueue->m_cameraFar;
 
-	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("Vol fog");
+		cmdb->setPushConstants(&regs, sizeof(regs));
 
-	pass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) -> void {
-		run(ctx, rgraphCtx);
+		dispatchPPCompute(cmdb, m_workgroupSize[0], m_workgroupSize[1], m_volumeSize[0], m_volumeSize[1]);
 	});
-
-	pass.newDependency({m_runCtx.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE});
-	pass.newDependency({m_r->getVolumetricLightingAccumulation().getRt(), TextureUsageBit::SAMPLED_COMPUTE});
 }
 
 } // end namespace anki

+ 0 - 2
AnKi/Renderer/VolumetricFog.h

@@ -87,8 +87,6 @@ private:
 	public:
 		RenderTargetHandle m_rt;
 	} m_runCtx; ///< Runtime context.
-
-	void run(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
 };
 /// @}
 

+ 1 - 1
AnKi/Resource/ShaderProgramResource.cpp

@@ -387,7 +387,7 @@ void ShaderProgramResource::initVariant(const ShaderProgramResourceVariantInitIn
 			inf.m_constValues.setArray((constValueCount) ? constValues.getBegin() : nullptr, constValueCount);
 			ShaderPtr shader = getManager().getGrManager().newShader(inf);
 
-			if(false)
+			if(false && (ANKI_OS_LINUX || ANKI_OS_WINDOWS))
 			{
 				MaliOfflineCompilerOut maliocOut;
 				const Error err =

+ 27 - 1
AnKi/ShaderCompiler/MaliOfflineCompiler.cpp

@@ -192,7 +192,7 @@ static Error runMaliOfflineCompilerInternal(CString maliocExecutable, CString sp
 			return Error::FUNCTION_FAILED;
 		}
 	}
-	else
+	else if(shaderType == ShaderType::FRAGMENT)
 	{
 		if(std::regex_search(stdoutstl, match,
 							 std::regex("Total instruction cycles:\\s*" ANKI_FLOAT_REGEX "\\s*" ANKI_FLOAT_REGEX
@@ -217,6 +217,32 @@ static Error runMaliOfflineCompilerInternal(CString maliocExecutable, CString sp
 			return Error::FUNCTION_FAILED;
 		}
 	}
+	else
+	{
+		ANKI_ASSERT(shaderType == ShaderType::COMPUTE);
+
+		if(std::regex_search(stdoutstl, match,
+							 std::regex("Total instruction cycles:\\s*" ANKI_FLOAT_REGEX "\\s*" ANKI_FLOAT_REGEX
+										"\\s*" ANKI_FLOAT_REGEX "\\s*" ANKI_FLOAT_REGEX "\\s*" ANKI_FLOAT_REGEX
+										"\\s*([A-Z]+)")))
+		{
+			ANKI_ASSERT(match.size() == 7);
+
+			U32 count = 1;
+			ANKI_CHECK(CString(match[count++].str().c_str()).toNumber(out.m_fma));
+			ANKI_CHECK(CString(match[count++].str().c_str()).toNumber(out.m_cvt));
+			ANKI_CHECK(CString(match[count++].str().c_str()).toNumber(out.m_sfu));
+			ANKI_CHECK(CString(match[count++].str().c_str()).toNumber(out.m_loadStore));
+			ANKI_CHECK(CString(match[count++].str().c_str()).toNumber(out.m_texture));
+
+			out.m_boundUnit = strToHwUnit(match[count++].str().c_str());
+		}
+		else
+		{
+			ANKI_SHADER_COMPILER_LOGE("Error parsing instruction cycles");
+			return Error::FUNCTION_FAILED;
+		}
+	}
 
 #undef ANKI_FLOAT_REGEX
 

+ 4 - 1
AnKi/ShaderCompiler/ShaderProgramParser.cpp

@@ -19,7 +19,7 @@ static const Array<CString, U32(ShaderType::COUNT)> SHADER_STAGE_NAMES = {
 	{"VERTEX", "TESSELLATION_CONTROL", "TESSELLATION_EVALUATION", "GEOMETRY", "FRAGMENT", "COMPUTE", "RAY_GEN",
 	 "ANY_HIT", "CLOSEST_HIT", "MISS", "INTERSECTION", "CALLABLE"}};
 
-static const char* SHADER_HEADER = R"(#version 460 core
+static const char SHADER_HEADER[] = R"(#version 460 core
 #define ANKI_%s_SHADER 1
 #define ANKI_OS_ANDROID %d
 #define ANKI_OS_WINDOWS %d
@@ -252,6 +252,9 @@ static const char* SHADER_HEADER = R"(#version 460 core
 
 layout(std140, row_major) uniform;
 layout(std140, row_major) buffer;
+
+#define ANKI_RP mediump
+#define ANKI_FP highp
 )";
 
 static const U64 SHADER_HEADER_HASH = computeHash(SHADER_HEADER, sizeof(SHADER_HEADER));

+ 12 - 10
AnKi/Shaders/ApplyIrradianceToReflection.ankiprog

@@ -12,12 +12,12 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 6) in;
 layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
 layout(set = 0, binding = 1) uniform texture2D u_gbufferTex[3u];
 
-layout(set = 0, binding = 2) buffer readonly ssbo_
+layout(set = 0, binding = 2) buffer readonly b_ssbo
 {
-	Vec4 u_irradianceDice[6u];
+	ANKI_RP Vec4 u_irradianceDice[6u];
 };
 
-layout(set = 0, binding = 3, r11f_g11f_b10f) uniform imageCube u_cubeTex;
+layout(set = 0, binding = 3, r11f_g11f_b10f) uniform ANKI_RP imageCube u_cubeTex;
 
 void main()
 {
@@ -34,20 +34,22 @@ void main()
 
 	// Read the gbuffer
 	GbufferInfo gbuffer;
-	readGBuffer(u_gbufferTex[0u], u_gbufferTex[1u], u_gbufferTex[2u], u_nearestAnyClampSampler, sampleUv, 0.0, gbuffer);
+	unpackGBufferNoVelocity(textureLod(u_gbufferTex[0u], u_nearestAnyClampSampler, sampleUv, 0.0),
+							textureLod(u_gbufferTex[1u], u_nearestAnyClampSampler, sampleUv, 0.0),
+							textureLod(u_gbufferTex[2u], u_nearestAnyClampSampler, sampleUv, 0.0), gbuffer);
 
 	// Sample
-	const Vec3 irradiance = sampleAmbientDice(u_irradianceDice[0u].xyz, u_irradianceDice[1u].xyz,
-											  u_irradianceDice[2u].xyz, u_irradianceDice[3u].xyz,
-											  u_irradianceDice[4u].xyz, u_irradianceDice[5u].xyz, gbuffer.m_normal);
+	const ANKI_RP Vec3 irradiance = sampleAmbientDice(
+		u_irradianceDice[0u].xyz, u_irradianceDice[1u].xyz, u_irradianceDice[2u].xyz, u_irradianceDice[3u].xyz,
+		u_irradianceDice[4u].xyz, u_irradianceDice[5u].xyz, gbuffer.m_normal);
 
 	// Compute the indirect term
-	const Vec3 indirect = gbuffer.m_diffuse * irradiance;
+	const ANKI_RP Vec3 indirect = gbuffer.m_diffuse * irradiance;
 
 	// Read the prev color and apply indirect
 	const IVec3 coords = IVec3(globalInvocationID, faceIdx);
-	const Vec3 prevColor = imageLoad(u_cubeTex, coords).xyz;
-	const Vec3 prevColorWithIndirectDiffuse = prevColor + gbuffer.m_diffuse * indirect;
+	const ANKI_RP Vec3 prevColor = imageLoad(u_cubeTex, coords).xyz;
+	const ANKI_RP Vec3 prevColorWithIndirectDiffuse = prevColor + gbuffer.m_diffuse * indirect;
 
 	// Barrier just in case
 	memoryBarrierImage();

+ 7 - 8
AnKi/Shaders/Bloom.ankiprog

@@ -14,7 +14,7 @@ layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_s
 
 // Vars
 layout(set = 0, binding = 0) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 1) uniform texture2D u_tex; ///< Its the IS RT
+layout(set = 0, binding = 1) uniform ANKI_RP texture2D u_tex; ///< Its the IS RT
 
 layout(push_constant) uniform pc_
 {
@@ -40,13 +40,12 @@ void main()
 
 	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(FB_SIZE);
 
-	Vec3 color = textureLod(u_tex, u_linearAnyClampSampler, uv, 0.0).rgb;
-	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, IVec2(+1, +1)).rgb;
-	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, IVec2(-1, -1)).rgb;
-	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, IVec2(-1, +1)).rgb;
-	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, IVec2(+1, -1)).rgb;
-
-	color *= (1.0 / 5.0);
+	ANKI_RP F32 weight = 1.0 / 5.0;
+	ANKI_RP Vec3 color = textureLod(u_tex, u_linearAnyClampSampler, uv, 0.0).rgb * weight;
+	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, IVec2(+1, +1)).rgb * weight;
+	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, IVec2(-1, -1)).rgb * weight;
+	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, IVec2(-1, +1)).rgb * weight;
+	color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, IVec2(+1, -1)).rgb * weight;
 
 	color = tonemap(color, u_averageLuminancePad3.x, u_thresholdScalePad2.x) * u_thresholdScalePad2.y;
 

+ 24 - 27
AnKi/Shaders/BloomUpscale.ankiprog

@@ -8,13 +8,13 @@ ANKI_SPECIALIZATION_CONSTANT_UVEC2(INPUT_TEX_SIZE, 2u);
 const UVec2 WORKGROUP_SIZE = UVec2(16u, 16u);
 
 #pragma anki start comp
-#include <AnKi/Shaders/Common.glsl>
+#include <AnKi/Shaders/Functions.glsl>
 
 layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
 
 layout(set = 0, binding = 0) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 1) uniform texture2D u_tex;
-layout(set = 0, binding = 2) uniform texture2D u_lensDirtTex;
+layout(set = 0, binding = 1) uniform ANKI_RP texture2D u_tex;
+layout(set = 0, binding = 2) uniform ANKI_RP texture2D u_lensDirtTex;
 
 layout(set = 0, binding = 3) writeonly uniform image2D out_img;
 
@@ -27,20 +27,20 @@ const F32 CHROMATIC_DISTORTION = 3.0;
 #define ENABLE_HALO 1
 const F32 HALO_OPACITY = 0.5;
 
-Vec3 textureDistorted(texture2D tex, sampler sampl, Vec2 uv,
-					  Vec2 direction, // direction of DISTORTION
-					  Vec3 DISTORTION) // per-channel DISTORTION factor
+ANKI_RP Vec3 textureDistorted(ANKI_RP texture2D tex, sampler sampl, Vec2 uv,
+							  Vec2 direction, // direction of distortion
+							  Vec3 distortion) // per-channel distortion factor
 {
 #if ENABLE_CHROMATIC_DISTORTION
-	return Vec3(textureLod(tex, sampl, uv + direction * DISTORTION.r, 0.0).r,
-				textureLod(tex, sampl, uv + direction * DISTORTION.g, 0.0).g,
-				textureLod(tex, sampl, uv + direction * DISTORTION.b, 0.0).b);
+	return Vec3(textureLod(tex, sampl, uv + direction * distortion.r, 0.0).r,
+				textureLod(tex, sampl, uv + direction * distortion.g, 0.0).g,
+				textureLod(tex, sampl, uv + direction * distortion.b, 0.0).b);
 #else
 	return textureLod(tex, uv, 0.0).rgb;
 #endif
 }
 
-Vec3 ssLensFlare(Vec2 uv)
+ANKI_RP Vec3 ssLensFlare(Vec2 uv)
 {
 	const Vec2 TEXEL_SIZE = 1.0 / Vec2(INPUT_TEX_SIZE);
 	const Vec3 DISTORTION = Vec3(-TEXEL_SIZE.x * CHROMATIC_DISTORTION, 0.0, TEXEL_SIZE.x * CHROMATIC_DISTORTION);
@@ -51,14 +51,14 @@ Vec3 ssLensFlare(Vec2 uv)
 	const Vec2 ghostVec = (Vec2(0.5) - flipUv) * GHOST_DISPERSAL;
 
 	const Vec2 direction = normalize(ghostVec);
-	Vec3 result = Vec3(0.0);
+	ANKI_RP Vec3 result = Vec3(0.0);
 
-	// sample ghosts:
+	// Sample ghosts
 	ANKI_UNROLL for(U32 i = 0u; i < MAX_GHOSTS; ++i)
 	{
 		const Vec2 offset = fract(flipUv + ghostVec * F32(i));
 
-		F32 weight = length(Vec2(0.5) - offset) / LEN_OF_HALF;
+		ANKI_RP F32 weight = length(Vec2(0.5) - offset) / LEN_OF_HALF;
 		weight = pow(1.0 - weight, 10.0);
 
 		result += textureDistorted(u_tex, u_linearAnyClampSampler, offset, direction, DISTORTION) * weight;
@@ -67,7 +67,7 @@ Vec3 ssLensFlare(Vec2 uv)
 	// Sample halo
 #if ENABLE_HALO
 	const Vec2 haloVec = normalize(ghostVec) * HALO_WIDTH;
-	F32 weight = length(Vec2(0.5) - fract(flipUv + haloVec)) / LEN_OF_HALF;
+	ANKI_RP F32 weight = length(Vec2(0.5) - fract(flipUv + haloVec)) / LEN_OF_HALF;
 	weight = pow(1.0 - weight, 20.0);
 	result += textureDistorted(u_tex, u_linearAnyClampSampler, flipUv + haloVec, direction, DISTORTION)
 			  * (weight * HALO_OPACITY);
@@ -79,31 +79,28 @@ Vec3 ssLensFlare(Vec2 uv)
 	return result;
 }
 
-Vec3 upscale(Vec2 uv)
+ANKI_RP Vec3 upscale(Vec2 uv)
 {
-	Vec3 result = textureLod(u_tex, u_linearAnyClampSampler, uv, 0.0).rgb;
-	result += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, ivec2(+1, +1)).rgb;
-	result += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, ivec2(+1, -1)).rgb;
-	result += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, ivec2(-1, -1)).rgb;
-	result += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, ivec2(-1, +1)).rgb;
+	const ANKI_RP F32 weight = 1.0 / 5.0;
+	ANKI_RP Vec3 result = textureLod(u_tex, u_linearAnyClampSampler, uv, 0.0).rgb * weight;
+	result += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, ivec2(+1, +1)).rgb * weight;
+	result += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, ivec2(+1, -1)).rgb * weight;
+	result += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, ivec2(-1, -1)).rgb * weight;
+	result += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), uv, 0.0, ivec2(-1, +1)).rgb * weight;
 
-	result *= (1.0 / 5.0);
 	return result;
 }
 
 void main()
 {
-	if((FB_SIZE.x % WORKGROUP_SIZE.x) != 0u || (FB_SIZE.y % WORKGROUP_SIZE.y) != 0u) // This check is free
+	if(skipOutOfBoundsInvocations(WORKGROUP_SIZE, FB_SIZE))
 	{
-		if(gl_GlobalInvocationID.x >= FB_SIZE.x || gl_GlobalInvocationID.y >= FB_SIZE.y)
-		{
-			return;
-		}
+		return;
 	}
 
 	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(FB_SIZE);
 
-	const Vec3 outColor = ssLensFlare(uv) + upscale(uv);
+	const ANKI_RP Vec3 outColor = ssLensFlare(uv) + upscale(uv);
 	imageStore(out_img, IVec2(gl_GlobalInvocationID.xy), Vec4(outColor, 0.0));
 }
 #pragma anki end

+ 5 - 4
AnKi/Shaders/ClusteredShadingCommon.glsl

@@ -44,7 +44,7 @@ layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_REFLECTIONS_BIND
 };
 
 layout(set = CLUSTERED_SHADING_SET,
-	   binding = CLUSTERED_SHADING_REFLECTIONS_BINDING + 1) uniform textureCubeArray u_reflectionsTex;
+	   binding = CLUSTERED_SHADING_REFLECTIONS_BINDING + 1) uniform ANKI_RP textureCubeArray u_reflectionsTex;
 layout(set = CLUSTERED_SHADING_SET,
 	   binding = CLUSTERED_SHADING_REFLECTIONS_BINDING + 2) uniform texture2D u_integrationLut;
 #endif
@@ -58,9 +58,10 @@ layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_DECALS_BINDING,
 	Decal u_decals2[MAX_VISIBLE_DECALS];
 };
 
-layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_DECALS_BINDING + 1) uniform texture2D u_diffuseDecalTex;
 layout(set = CLUSTERED_SHADING_SET,
-	   binding = CLUSTERED_SHADING_DECALS_BINDING + 2) uniform texture2D u_specularRoughnessDecalTex;
+	   binding = CLUSTERED_SHADING_DECALS_BINDING + 1) uniform ANKI_RP texture2D u_diffuseDecalTex;
+layout(set = CLUSTERED_SHADING_SET,
+	   binding = CLUSTERED_SHADING_DECALS_BINDING + 2) uniform ANKI_RP texture2D u_specularRoughnessDecalTex;
 #endif
 
 //
@@ -77,7 +78,7 @@ layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_FOG_BINDING, sca
 // GI (2)
 //
 #if defined(CLUSTERED_SHADING_GI_BINDING)
-layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_GI_BINDING) uniform texture3D
+layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_GI_BINDING) uniform ANKI_RP texture3D
 	u_globalIlluminationTextures[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES];
 
 layout(set = CLUSTERED_SHADING_SET, binding = CLUSTERED_SHADING_GI_BINDING + 1, scalar) uniform b_giProbes

+ 7 - 1
AnKi/Shaders/Common.glsl

@@ -12,8 +12,13 @@
 
 // Constants
 const F32 EPSILON = 0.000001;
-const F32 FLT_MAX = 3.402823e+38;
+const F16 EPSILON_F16 = 0.0001hf; // Divisions by this should be OK according to http://weitz.de/ieee/
+const ANKI_RP F32 EPSILON_RP = F32(EPSILON_F16);
+
 const U32 MAX_U32 = 0xFFFFFFFFu;
+const F32 MAX_F32 = 3.402823e+38;
+const F16 MAX_F16 = 65504.0hf;
+const F16 MIN_F16 = 0.00006104hf;
 
 const F32 PI = 3.14159265358979323846;
 const U32 MAX_UBO_SIZE = 16384u;
@@ -23,6 +28,7 @@ const U32 MAX_SHARED_MEMORY = 32u * 1024u;
 #define UV_TO_NDC(x_) ((x_)*2.0 - 1.0)
 #define NDC_TO_UV(x_) ((x_)*0.5 + 0.5)
 #define saturate(x_) clamp((x_), 0.0, 1.0)
+#define saturateRp(x) min(x, F32(MAX_F16))
 #define mad(a_, b_, c_) fma((a_), (b_), (c_))
 
 // Passes

+ 10 - 11
AnKi/Shaders/DownscaleBlur.glsl

@@ -8,7 +8,7 @@
 #include <AnKi/Shaders/Common.glsl>
 
 layout(set = 0, binding = 0) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 1) uniform texture2D u_tex;
+layout(set = 0, binding = 1) uniform ANKI_RP texture2D u_tex;
 
 #if defined(ANKI_COMPUTE_SHADER)
 const UVec2 WORKGROUP_SIZE = UVec2(16, 16);
@@ -22,11 +22,11 @@ layout(push_constant, row_major, std430) uniform pc_
 };
 
 Vec2 in_uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(u_fbSize);
-layout(set = 0, binding = 2) writeonly uniform image2D out_img;
-Vec3 out_color;
+layout(set = 0, binding = 2) writeonly uniform ANKI_RP image2D out_img;
+ANKI_RP Vec3 out_color;
 #else
 layout(location = 0) in Vec2 in_uv;
-layout(location = 0) out Vec3 out_color;
+layout(location = 0) out ANKI_RP Vec3 out_color;
 #endif
 
 void main()
@@ -39,13 +39,12 @@ void main()
 	}
 #endif
 
-	out_color = textureLod(u_tex, u_linearAnyClampSampler, in_uv, 0.0).rgb;
-	out_color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), in_uv, 0.0, IVec2(+1, +1)).rgb;
-	out_color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), in_uv, 0.0, IVec2(-1, -1)).rgb;
-	out_color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), in_uv, 0.0, IVec2(+1, -1)).rgb;
-	out_color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), in_uv, 0.0, IVec2(-1, +1)).rgb;
-
-	out_color *= (1.0 / 5.0);
+	const ANKI_RP F32 weight = 1.0 / 5.0;
+	out_color = textureLod(u_tex, u_linearAnyClampSampler, in_uv, 0.0).rgb * weight;
+	out_color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), in_uv, 0.0, IVec2(+1, +1)).rgb * weight;
+	out_color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), in_uv, 0.0, IVec2(-1, -1)).rgb * weight;
+	out_color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), in_uv, 0.0, IVec2(+1, -1)).rgb * weight;
+	out_color += textureLodOffset(sampler2D(u_tex, u_linearAnyClampSampler), in_uv, 0.0, IVec2(-1, +1)).rgb * weight;
 
 #if defined(ANKI_COMPUTE_SHADER)
 	imageStore(out_img, IVec2(gl_GlobalInvocationID.xy), Vec4(out_color, 0.0));

+ 0 - 0
AnKi/Shaders/DownscaleBlur.ankiprog → AnKi/Shaders/DownscaleBlurRaster.ankiprog


+ 15 - 15
AnKi/Shaders/FinalComposite.ankiprog

@@ -24,37 +24,37 @@ layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
 layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
 layout(set = 0, binding = 2) uniform sampler u_trilinearRepeatSampler;
 
-layout(set = 0, binding = 3) uniform texture2D u_lightShadingRt;
-layout(set = 0, binding = 4) uniform texture2D u_ppsBloomLfRt;
-layout(set = 0, binding = 5) uniform texture3D u_lut;
-layout(set = 0, binding = 6) uniform texture2D u_blueNoise;
+layout(set = 0, binding = 3) uniform ANKI_RP texture2D u_lightShadingRt;
+layout(set = 0, binding = 4) uniform ANKI_RP texture2D u_ppsBloomLfRt;
+layout(set = 0, binding = 5) uniform ANKI_RP texture3D u_lut;
+layout(set = 0, binding = 6) uniform ANKI_RP texture2D u_blueNoise;
 layout(set = 0, binding = 7) uniform texture2D u_motionVectorsRt;
 layout(set = 0, binding = 8) uniform texture2D u_depthRt;
 #if DBG_ENABLED
-layout(set = 0, binding = 9) uniform texture2D u_dbgOutlineRt;
+layout(set = 0, binding = 9) uniform ANKI_RP texture2D u_dbgOutlineRt;
 #endif
 
-layout(push_constant, row_major, std430) uniform pc_
+layout(push_constant, row_major, std430) uniform b_pc
 {
 	UVec4 u_frameCountPad3;
 };
 
 layout(location = 0) in Vec2 in_uv;
-layout(location = 0) out Vec3 out_color;
+layout(location = 0) out ANKI_RP Vec3 out_color;
 
-Vec3 colorGrading(Vec3 color)
+ANKI_RP Vec3 colorGrading(ANKI_RP Vec3 color)
 {
-	const Vec3 LUT_SCALE = Vec3((F32(LUT_SIZE) - 1.0) / F32(LUT_SIZE));
-	const Vec3 LUT_OFFSET = Vec3(1.0 / (2.0 * F32(LUT_SIZE)));
+	const ANKI_RP Vec3 LUT_SCALE = Vec3((F32(LUT_SIZE) - 1.0) / F32(LUT_SIZE));
+	const ANKI_RP Vec3 LUT_OFFSET = Vec3(1.0 / (2.0 * F32(LUT_SIZE)));
 
 	color = min(color, Vec3(1.0));
-	const Vec3 lutCoords = color * LUT_SCALE + LUT_OFFSET;
+	const ANKI_RP Vec3 lutCoords = color * LUT_SCALE + LUT_OFFSET;
 	return textureLod(u_lut, u_trilinearRepeatSampler, lutCoords, 0.0).rgb;
 }
 
 void main()
 {
-	const Vec2 uv = in_uv.xy;
+	const Vec2 uv = in_uv;
 
 	if(MOTION_BLUR_SAMPLES > 0u)
 	{
@@ -67,7 +67,7 @@ void main()
 	}
 
 #if BLOOM_ENABLED
-	const Vec3 bloom = textureLod(u_ppsBloomLfRt, u_linearAnyClampSampler, uv, 0.0).rgb;
+	const ANKI_RP Vec3 bloom = textureLod(u_ppsBloomLfRt, u_linearAnyClampSampler, uv, 0.0).rgb;
 	out_color += bloom;
 #endif
 
@@ -75,7 +75,7 @@ void main()
 
 #if BLUE_NOISE
 	const Vec2 bnUvw = Vec2(FB_SIZE) / Vec2(64.0) * uv;
-	Vec3 blueNoise = textureLod(u_blueNoise, u_trilinearRepeatSampler, bnUvw, 0.0).rgb;
+	ANKI_RP Vec3 blueNoise = textureLod(u_blueNoise, u_trilinearRepeatSampler, bnUvw, 0.0).rgb;
 	blueNoise = animateBlueNoise(blueNoise, u_frameCountPad3.x);
 	blueNoise = blueNoise * 2.0 - 1.0;
 	blueNoise = sign(blueNoise) * (1.0 - sqrt(1.0 - abs(blueNoise)));
@@ -84,7 +84,7 @@ void main()
 #endif
 
 #if DBG_ENABLED
-	const Vec4 dbg = textureLod(u_dbgOutlineRt, u_linearAnyClampSampler, uv, 0.0);
+	const ANKI_RP Vec4 dbg = textureLod(u_dbgOutlineRt, u_linearAnyClampSampler, uv, 0.0);
 	out_color = mix(out_color, dbg.rgb, dbg.a);
 #endif
 }

+ 11 - 11
AnKi/Shaders/ForwardShadingCommonFrag.glsl

@@ -13,7 +13,7 @@
 // Global resources
 layout(set = 0, binding = 0) uniform sampler u_linearAnyClampSampler;
 layout(set = 0, binding = 1) uniform texture2D u_gbufferDepthRt;
-layout(set = 0, binding = 2) uniform texture3D u_lightVol;
+layout(set = 0, binding = 2) uniform ANKI_RP texture3D u_lightVol;
 #define CLUSTERED_SHADING_SET 0
 #define CLUSTERED_SHADING_UNIFORMS_BINDING 3
 #define CLUSTERED_SHADING_LIGHTS_BINDING 4
@@ -22,12 +22,12 @@ layout(set = 0, binding = 2) uniform texture3D u_lightVol;
 
 layout(location = 0) out Vec4 out_color;
 
-void writeGBuffer(Vec4 color)
+void packGBuffer(Vec4 color)
 {
 	out_color = Vec4(color.rgb, color.a);
 }
 
-Vec4 readAnimatedTextureRgba(texture2DArray tex, sampler sampl, F32 period, Vec2 uv, F32 time)
+ANKI_RP Vec4 readAnimatedTextureRgba(ANKI_RP texture2DArray tex, sampler sampl, F32 period, Vec2 uv, F32 time)
 {
 	const F32 layerCount = F32(textureSize(tex, 0).z);
 	const F32 layer = mod(time * layerCount / period, layerCount);
@@ -37,7 +37,7 @@ Vec4 readAnimatedTextureRgba(texture2DArray tex, sampler sampl, F32 period, Vec2
 // Iterate the clusters to compute the light color
 Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos)
 {
-	diffCol = diffuseLambert(diffCol);
+	diffCol = diffuseLobe(diffCol);
 	Vec3 outColor = Vec3(0.0);
 
 	// Find the cluster and then the light counts
@@ -101,7 +101,7 @@ Vec3 computeLightColorHigh(Vec3 diffCol, Vec3 worldPos)
 }
 
 // Just read the light color from the vol texture
-Vec3 computeLightColorLow(Vec3 diffCol, Vec3 worldPos)
+ANKI_RP Vec3 computeLightColorLow(ANKI_RP Vec3 diffCol, ANKI_RP Vec3 worldPos)
 {
 	const Vec2 uv = gl_FragCoord.xy / u_clusteredShading.m_renderingSize;
 	const F32 linearDepth = linearizeDepth(gl_FragCoord.z, u_clusteredShading.m_near, u_clusteredShading.m_far);
@@ -109,16 +109,16 @@ Vec3 computeLightColorLow(Vec3 diffCol, Vec3 worldPos)
 		Vec3(uv, linearDepth
 					 * (F32(u_clusteredShading.m_zSplitCount) / F32(u_clusteredShading.m_lightVolumeLastZSplit + 1u)));
 
-	const Vec3 light = textureLod(u_lightVol, u_linearAnyClampSampler, uvw, 0.0).rgb;
-	return diffuseLambert(diffCol) * light;
+	const ANKI_RP Vec3 light = textureLod(u_lightVol, u_linearAnyClampSampler, uvw, 0.0).rgb;
+	return diffuseLobe(diffCol) * light;
 }
 
-void particleAlpha(Vec4 color, Vec4 scaleColor, Vec4 biasColor)
+void particleAlpha(ANKI_RP Vec4 color, ANKI_RP Vec4 scaleColor, ANKI_RP Vec4 biasColor)
 {
-	writeGBuffer(color * scaleColor + biasColor);
+	packGBuffer(color * scaleColor + biasColor);
 }
 
-void fog(Vec3 color, F32 fogAlphaScale, F32 fogDistanceOfMaxThikness, F32 zVSpace)
+void fog(ANKI_RP Vec3 color, ANKI_RP F32 fogAlphaScale, ANKI_RP F32 fogDistanceOfMaxThikness, F32 zVSpace)
 {
 	const Vec2 screenSize = 1.0 / u_clusteredShading.m_renderingSize;
 
@@ -134,5 +134,5 @@ void fog(Vec3 color, F32 fogAlphaScale, F32 fogDistanceOfMaxThikness, F32 zVSpac
 
 	zFeatherFactor = min(1.0, diff / fogDistanceOfMaxThikness);
 
-	writeGBuffer(Vec4(color, zFeatherFactor * fogAlphaScale));
+	packGBuffer(Vec4(color, zFeatherFactor * fogAlphaScale));
 }

+ 0 - 5
AnKi/Shaders/ForwardShadingCommonVert.glsl

@@ -11,8 +11,3 @@
 
 // In/out
 layout(location = VERTEX_ATTRIBUTE_ID_POSITION) in Vec3 in_position;
-
-out gl_PerVertex
-{
-	Vec4 gl_Position;
-};

+ 3 - 3
AnKi/Shaders/ForwardShadingFog.ankiprog

@@ -7,9 +7,9 @@ struct PerDraw
 {
 	Mat4 m_ankiMvp;
 	Mat4 m_ankiModelViewMatrix;
-	Vec3 m_fogColor;
-	F32 m_fogAlphaScale;
-	F32 m_fogDistanceOfMaxThikness;
+	ANKI_RP Vec3 m_fogColor;
+	ANKI_RP F32 m_fogAlphaScale;
+	ANKI_RP F32 m_fogDistanceOfMaxThikness;
 };
 
 #pragma anki reflect b_ankiPerDraw

+ 10 - 10
AnKi/Shaders/ForwardShadingParticles.ankiprog

@@ -13,8 +13,8 @@ struct PerDraw
 #if ANIMATED_TEXTURE == 1
 	F32 m_animationPeriod;
 #endif
-	Vec4 m_colorScale;
-	Vec4 m_colorBias;
+	ANKI_RP Vec4 m_colorScale;
+	ANKI_RP Vec4 m_colorBias;
 };
 
 #pragma anki reflect b_ankiPerDraw
@@ -27,11 +27,11 @@ layout(set = 1, binding = 0, row_major) uniform b_ankiPerDraw
 layout(set = 1, binding = 1) uniform sampler u_ankiGlobalSampler;
 #if ANIMATED_TEXTURE == 0
 #	pragma anki reflect u_diffuseMap
-layout(set = 1, binding = 2) uniform texture2D u_diffuseMap;
+layout(set = 1, binding = 2) uniform ANKI_RP texture2D u_diffuseMap;
 #endif
 #if ANIMATED_TEXTURE == 1
 #	pragma anki reflect u_diffuseMapArr
-layout(set = 1, binding = 2) uniform texture2DArray u_diffuseMapArr;
+layout(set = 1, binding = 2) uniform ANKI_RP texture2DArray u_diffuseMapArr;
 #endif
 
 #pragma anki start vert
@@ -40,7 +40,7 @@ layout(set = 1, binding = 2) uniform texture2DArray u_diffuseMapArr;
 layout(location = VERTEX_ATTRIBUTE_ID_SCALE) in F32 in_scale;
 layout(location = VERTEX_ATTRIBUTE_ID_ALPHA) in F32 in_alpha;
 
-layout(location = 0) flat out F32 out_alpha;
+layout(location = 0) flat out ANKI_RP F32 out_alpha;
 layout(location = 1) out Vec2 out_uv;
 layout(location = 2) out Vec3 out_worldPos;
 
@@ -58,24 +58,24 @@ void main()
 #pragma anki start frag
 #include <AnKi/Shaders/ForwardShadingCommonFrag.glsl>
 
-layout(location = 0) flat in F32 in_alpha;
+layout(location = 0) flat in ANKI_RP F32 in_alpha;
 layout(location = 1) in Vec2 in_uv;
 layout(location = 2) in Vec3 in_worldPos;
 
 void main()
 {
 #if ANIMATED_TEXTURE == 1
-	Vec4 texCol = readAnimatedTextureRgba(u_diffuseMapArr, u_ankiGlobalSampler, u_ankiPerDraw.m_animationPeriod, in_uv,
-										  u_clusteredShading.m_time);
+	ANKI_RP Vec4 texCol = readAnimatedTextureRgba(u_diffuseMapArr, u_ankiGlobalSampler, u_ankiPerDraw.m_animationPeriod,
+												  in_uv, u_clusteredShading.m_time);
 #else
-	Vec4 texCol = texture(u_diffuseMap, u_ankiGlobalSampler, in_uv);
+	ANKI_RP Vec4 texCol = texture(u_diffuseMap, u_ankiGlobalSampler, in_uv);
 #endif
 
 #if LIGHT
 	texCol.rgb = computeLightColorLow(texCol.rgb, in_worldPos);
 #endif
 
-	Vec4 colScale = u_ankiPerDraw.m_colorScale;
+	ANKI_RP Vec4 colScale = u_ankiPerDraw.m_colorScale;
 	colScale.a *= in_alpha;
 	particleAlpha(texCol, colScale, u_ankiPerDraw.m_colorBias);
 }

+ 2 - 2
AnKi/Shaders/Fsr.ankiprog

@@ -11,8 +11,8 @@
 #include <AnKi/Shaders/Functions.glsl>
 
 layout(set = 0, binding = 0) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 1) uniform texture2D u_tex;
-layout(set = 0, binding = 2) writeonly uniform image2D u_outImg;
+layout(set = 0, binding = 1) uniform ANKI_RP texture2D u_tex;
+layout(set = 0, binding = 2) writeonly uniform ANKI_RP image2D u_outImg;
 
 layout(push_constant, std430) uniform b_pc
 {

+ 10 - 10
AnKi/Shaders/GBufferCommon.glsl

@@ -38,9 +38,9 @@ out gl_PerVertex
 
 #	if ANKI_PASS == PASS_GB
 layout(location = 0) out Vec2 out_uv;
-layout(location = 1) out Vec3 out_normal;
-layout(location = 2) out Vec3 out_tangent;
-layout(location = 3) out Vec3 out_bitangent;
+layout(location = 1) out ANKI_RP Vec3 out_normal;
+layout(location = 2) out ANKI_RP Vec3 out_tangent;
+layout(location = 3) out ANKI_RP Vec3 out_bitangent;
 
 #		if REALLY_USING_PARALLAX
 layout(location = 4) out F32 out_distFromTheCamera;
@@ -59,9 +59,9 @@ layout(location = 7) out Vec2 out_velocity;
 //
 #if defined(ANKI_FRAGMENT_SHADER) && ANKI_PASS == PASS_GB
 layout(location = 0) in Vec2 in_uv;
-layout(location = 1) in Vec3 in_normal;
-layout(location = 2) in Vec3 in_tangent;
-layout(location = 3) in Vec3 in_bitangent;
+layout(location = 1) in ANKI_RP Vec3 in_normal;
+layout(location = 2) in ANKI_RP Vec3 in_tangent;
+layout(location = 3) in ANKI_RP Vec3 in_bitangent;
 
 #	if REALLY_USING_PARALLAX
 layout(location = 4) in F32 in_distFromTheCamera;
@@ -90,18 +90,18 @@ layout(location = 3) out Vec2 out_gbuffer3;
 
 // Write the data to RTs
 #if defined(ANKI_FRAGMENT_SHADER) && ANKI_PASS == PASS_GB
-void writeGBuffer(Vec3 diffColor, Vec3 normal, Vec3 specularColor, F32 roughness, F32 subsurface, Vec3 emission,
-				  F32 metallic, Vec2 velocity)
+void packGBuffer(ANKI_RP Vec3 diffColor, ANKI_RP Vec3 normal, ANKI_RP Vec3 specularColor, ANKI_RP F32 roughness,
+				 ANKI_RP F32 subsurface, ANKI_RP Vec3 emission, ANKI_RP F32 metallic, Vec2 velocity)
 {
 	GbufferInfo g;
 	g.m_diffuse = diffColor;
 	g.m_normal = normal;
-	g.m_specular = specularColor;
+	g.m_f0 = specularColor;
 	g.m_roughness = roughness;
 	g.m_subsurface = subsurface;
 	g.m_emission = (emission.r + emission.g + emission.b) / 3.0;
 	g.m_metallic = metallic;
 	g.m_velocity = velocity;
-	writeGBuffer(g, out_gbuffer0, out_gbuffer1, out_gbuffer2, out_gbuffer3);
+	packGBuffer(g, out_gbuffer0, out_gbuffer1, out_gbuffer2, out_gbuffer3);
 }
 #endif

+ 30 - 30
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -45,36 +45,36 @@
 layout(set = 0, binding = 2) uniform sampler u_ankiGlobalSampler;
 #if DIFFUSE_TEX == 1 && ANKI_PASS == PASS_GB
 #	pragma anki reflect u_diffTex
-layout(set = 0, binding = 3) uniform texture2D u_diffTex;
+layout(set = 0, binding = 3) uniform ANKI_RP texture2D u_diffTex;
 #	define USING_DIFF_TEX 1
 #endif
 #if SPECULAR_TEX == 1 && ANKI_PASS == PASS_GB
 #	pragma anki reflect u_specTex
-layout(set = 0, binding = 4) uniform texture2D u_specTex;
+layout(set = 0, binding = 4) uniform ANKI_RP texture2D u_specTex;
 #	define USING_SPECULAR_TEX 1
 #endif
 #if ROUGHNESS_TEX == 1 && ANKI_PASS == PASS_GB
 #	pragma anki reflect u_roughnessTex
-layout(set = 0, binding = 5) uniform texture2D u_roughnessTex;
+layout(set = 0, binding = 5) uniform ANKI_RP texture2D u_roughnessTex;
 #	define USING_ROUGHNESS_TEX 1
 #endif
 #if NORMAL_TEX == 1 && ANKI_PASS == PASS_GB && ANKI_LOD < 2
 #	pragma anki reflect u_normalTex
-layout(set = 0, binding = 6) uniform texture2D u_normalTex;
+layout(set = 0, binding = 6) uniform ANKI_RP texture2D u_normalTex;
 #	define USING_NORMAL_TEX 1
 #endif
 #if METAL_TEX == 1 && ANKI_PASS == PASS_GB
 #	pragma anki reflect u_metallicTex
-layout(set = 0, binding = 7) uniform texture2D u_metallicTex;
+layout(set = 0, binding = 7) uniform ANKI_RP texture2D u_metallicTex;
 #	define USING_METALLIC_TEX 1
 #endif
 #if REALLY_USING_PARALLAX
 #	pragma anki reflect u_heightTex
-layout(set = 0, binding = 8) uniform texture2D u_heightTex;
+layout(set = 0, binding = 8) uniform ANKI_RP texture2D u_heightTex;
 #endif
 #if EMISSIVE_TEX == 1 && ANKI_PASS == PASS_GB
 #	pragma anki reflect u_emissiveTex
-layout(set = 0, binding = 9) uniform texture2D u_emissiveTex;
+layout(set = 0, binding = 9) uniform ANKI_RP texture2D u_emissiveTex;
 #	define USING_EMISSIVE_TEX 1
 #endif
 
@@ -160,16 +160,16 @@ Vec3 g_position = in_position;
 #if ANKI_PASS == PASS_GB
 Vec3 g_prevPosition = in_position;
 Vec2 g_uv = in_uv;
-Vec3 g_normal = in_normal;
-Vec4 g_tangent = in_tangent;
+ANKI_RP Vec3 g_normal = in_normal;
+ANKI_RP Vec4 g_tangent = in_tangent;
 #endif
 
 // Perform skinning
 #if ANKI_BONES
 void skinning()
 {
-	Mat4 skinMat = u_ankiBoneTransforms[in_boneIndices[0]] * in_boneWeights[0];
-	Mat4 prevSkinMat = u_ankiPrevFrameBoneTransforms[in_boneIndices[0]] * in_boneWeights[0];
+	ANKI_RP Mat4 skinMat = u_ankiBoneTransforms[in_boneIndices[0]] * in_boneWeights[0];
+	ANKI_RP Mat4 prevSkinMat = u_ankiPrevFrameBoneTransforms[in_boneIndices[0]] * in_boneWeights[0];
 	ANKI_UNROLL for(U32 i = 1u; i < 4u; ++i)
 	{
 		skinMat += u_ankiBoneTransforms[in_boneIndices[i]] * in_boneWeights[i];
@@ -335,16 +335,16 @@ Vec2 computeTextureCoordParallax(texture2D heightMap, sampler sampl, Vec2 uv, F3
 
 // Do normal mapping
 #if ANKI_PASS == PASS_GB
-Vec3 readNormalFromTexture(texture2D map, sampler sampl, highp Vec2 texCoords)
+ANKI_RP Vec3 readNormalFromTexture(ANKI_RP texture2D map, sampler sampl, highp Vec2 texCoords)
 {
 	// First read the texture
-	const Vec3 nAtTangentspace = normalize((texture(map, sampl, texCoords).rgb - 0.5) * 2.0);
+	const ANKI_RP Vec3 nAtTangentspace = normalize((texture(map, sampl, texCoords).rgb - 0.5) * 2.0);
 
-	const Vec3 n = normalize(in_normal);
-	const Vec3 t = normalize(in_tangent);
-	const Vec3 b = normalize(in_bitangent);
+	const ANKI_RP Vec3 n = normalize(in_normal);
+	const ANKI_RP Vec3 t = normalize(in_tangent);
+	const ANKI_RP Vec3 b = normalize(in_bitangent);
 
-	const Mat3 tbnMat = Mat3(t, b, n);
+	const ANKI_RP Mat3 tbnMat = Mat3(t, b, n);
 
 	return tbnMat * nAtTangentspace;
 }
@@ -361,39 +361,39 @@ void main()
 #	endif
 
 #	if defined(USING_DIFF_TEX)
-	const Vec3 diffColor = texture(u_diffTex, u_ankiGlobalSampler, uv).rgb;
+	const ANKI_RP Vec3 diffColor = texture(u_diffTex, u_ankiGlobalSampler, uv).rgb;
 #	else
-	const Vec3 diffColor = u_ankiPerDraw.m_diffColor;
+	const ANKI_RP Vec3 diffColor = u_ankiPerDraw.m_diffColor;
 #	endif
 
 #	if defined(USING_SPECULAR_TEX)
-	const Vec3 specColor = texture(u_specTex, u_ankiGlobalSampler, uv).rgb;
+	const ANKI_RP Vec3 specColor = texture(u_specTex, u_ankiGlobalSampler, uv).rgb;
 #	else
-	const Vec3 specColor = u_ankiPerDraw.m_specColor;
+	const ANKI_RP Vec3 specColor = u_ankiPerDraw.m_specColor;
 #	endif
 
 #	if defined(USING_ROUGHNESS_TEX)
-	const F32 roughness = texture(u_roughnessTex, u_ankiGlobalSampler, uv).g;
+	const ANKI_RP F32 roughness = texture(u_roughnessTex, u_ankiGlobalSampler, uv).g;
 #	else
-	const F32 roughness = u_ankiPerDraw.m_roughness;
+	const ANKI_RP F32 roughness = u_ankiPerDraw.m_roughness;
 #	endif
 
 #	if defined(USING_METALLIC_TEX)
-	const F32 metallic = texture(u_metallicTex, u_ankiGlobalSampler, uv).b;
+	const ANKI_RP F32 metallic = texture(u_metallicTex, u_ankiGlobalSampler, uv).b;
 #	else
-	const F32 metallic = u_ankiPerDraw.m_metallic;
+	const ANKI_RP F32 metallic = u_ankiPerDraw.m_metallic;
 #	endif
 
 #	if defined(USING_NORMAL_TEX)
-	const Vec3 normal = readNormalFromTexture(u_normalTex, u_ankiGlobalSampler, uv);
+	const ANKI_RP Vec3 normal = readNormalFromTexture(u_normalTex, u_ankiGlobalSampler, uv);
 #	else
-	const Vec3 normal = normalize(in_normal);
+	const ANKI_RP Vec3 normal = normalize(in_normal);
 #	endif
 
 #	if defined(USING_EMISSIVE_TEX)
-	const Vec3 emission = texture(u_emissiveTex, u_ankiGlobalSampler, uv).rgb;
+	const ANKI_RP Vec3 emission = texture(u_emissiveTex, u_ankiGlobalSampler, uv).rgb;
 #	else
-	const Vec3 emission = u_ankiPerDraw.m_emission;
+	const ANKI_RP Vec3 emission = u_ankiPerDraw.m_emission;
 #	endif
 
 #	if ANKI_VELOCITY || ANKI_BONES
@@ -402,7 +402,7 @@ void main()
 	const Vec2 velocity = Vec2(1.0);
 #	endif
 
-	writeGBuffer(diffColor, normal, specColor, roughness, u_ankiPerDraw.m_subsurface, emission, metallic, velocity);
+	packGBuffer(diffColor, normal, specColor, roughness, u_ankiPerDraw.m_subsurface, emission, metallic, velocity);
 #elif ANKI_PASS == PASS_EZ
 	out_gbuffer0 = Vec4(0.0);
 	out_gbuffer1 = Vec4(0.0);

+ 9 - 9
AnKi/Shaders/GBufferGpuParticles.ankiprog

@@ -11,12 +11,12 @@
 struct PerDraw
 {
 	Mat4 m_ankiMvp;
-	Vec3 m_diffColor;
-	F32 m_roughness;
-	Vec3 m_specColor;
-	F32 m_metallic;
-	Vec3 m_initialEmission;
-	Vec3 m_finalEmission;
+	ANKI_RP Vec3 m_diffColor;
+	ANKI_RP F32 m_roughness;
+	ANKI_RP Vec3 m_specColor;
+	ANKI_RP F32 m_metallic;
+	ANKI_RP Vec3 m_initialEmission;
+	ANKI_RP Vec3 m_finalEmission;
 };
 
 #pragma anki reflect b_ankiPerDraw
@@ -78,13 +78,13 @@ void main()
 	GbufferInfo g;
 	g.m_diffuse = u_ankiPerDraw.m_diffColor;
 	g.m_normal = u_minusCameraZ;
-	g.m_specular = u_ankiPerDraw.m_specColor;
+	g.m_f0 = u_ankiPerDraw.m_specColor;
 	g.m_roughness = u_ankiPerDraw.m_roughness;
 	g.m_subsurface = 0.0;
-	const Vec3 emission = mix(u_ankiPerDraw.m_initialEmission, u_ankiPerDraw.m_finalEmission, in_lifeFactor);
+	const ANKI_RP Vec3 emission = mix(u_ankiPerDraw.m_initialEmission, u_ankiPerDraw.m_finalEmission, in_lifeFactor);
 	g.m_emission = (emission.r + emission.g + emission.b) / 3.0;
 	g.m_metallic = u_ankiPerDraw.m_metallic;
 	g.m_velocity = in_velocity;
-	writeGBuffer(g, out_gbuffer0, out_gbuffer1, out_gbuffer2, out_gbuffer3);
+	packGBuffer(g, out_gbuffer0, out_gbuffer1, out_gbuffer2, out_gbuffer3);
 }
 #pragma anki end

+ 15 - 15
AnKi/Shaders/GBufferPost.ankiprog

@@ -16,7 +16,7 @@ ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 3u);
 #include <AnKi/Shaders/Functions.glsl>
 
 layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
-layout(set = 0, binding = 1) uniform texture2D u_msDepthRt;
+layout(set = 0, binding = 1) uniform texture2D u_depthTex;
 layout(set = 0, binding = 2) uniform sampler u_trilinearRepeatSampler;
 
 #define CLUSTERED_SHADING_SET 0
@@ -27,8 +27,8 @@ layout(set = 0, binding = 2) uniform sampler u_trilinearRepeatSampler;
 
 layout(location = 0) in Vec2 in_uv;
 
-layout(location = 0) out Vec4 out_color0;
-layout(location = 1) out Vec4 out_color1;
+layout(location = 0) out ANKI_RP Vec4 out_diffuse;
+layout(location = 1) out ANKI_RP Vec4 out_roughnessMetallicF0;
 
 void main()
 {
@@ -39,11 +39,11 @@ void main()
 	// 2nd decal: d''=d'*(1-f')+D'*f' <=> d''=d*(1-f)*(1-f')+D*f*(1-f')+D'*f'
 	// By looking at the trend we will have to multiply the gbuffer.diff with: (1-f)*(1-f') ... (1-f'''')
 
-	Vec4 outDiffuse = Vec4(0.0, 0.0, 0.0, 1.0);
-	Vec4 outSpecular = Vec4(0.0, 0.0, 0.0, 1.0);
+	ANKI_RP Vec4 diffuse = Vec4(0.0, 0.0, 0.0, 1.0);
+	ANKI_RP Vec4 roughnessMetallicF0 = Vec4(0.0, 0.0, 0.0, 1.0);
 
 	// Get worldPos
-	const F32 depth = textureLod(u_msDepthRt, u_nearestAnyClampSampler, in_uv, 0.0).r;
+	const F32 depth = textureLod(u_depthTex, u_nearestAnyClampSampler, in_uv, 0.0).r;
 	const Vec2 ndc = UV_TO_NDC(in_uv);
 	const Vec4 worldPos4 = u_clusteredShading.m_matrices.m_invertedViewProjectionJitter * Vec4(ndc, depth, 1.0);
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
@@ -79,24 +79,24 @@ void main()
 
 		// Read diffuse
 		const Vec2 diffUv = mad(texCoords2, decal.m_diffuseUv.zw, decal.m_diffuseUv.xy);
-		const Vec4 decalDiff = texture(u_diffuseDecalTex, u_trilinearRepeatSampler, diffUv);
+		const ANKI_RP Vec4 decalDiff = texture(u_diffuseDecalTex, u_trilinearRepeatSampler, diffUv);
 
 		// Read roughness
 		const Vec2 specUv = mad(texCoords2, decal.m_normRoughnessUv.zw, decal.m_normRoughnessUv.xy);
-		const Vec3 spec = texture(u_specularRoughnessDecalTex, u_trilinearRepeatSampler, specUv).rgb;
+		const ANKI_RP Vec3 spec = texture(u_specularRoughnessDecalTex, u_trilinearRepeatSampler, specUv).rgb;
 
 		// Update diffuse
-		F32 f = decalDiff.a * decal.m_blendFactors[0];
-		outDiffuse.rgb = outDiffuse.rgb * (1.0 - f) + decalDiff.rgb * f;
-		outDiffuse.a *= (1.0 - f);
+		ANKI_RP F32 f = decalDiff.a * decal.m_blendFactors[0];
+		diffuse.rgb = diffuse.rgb * (1.0 - f) + decalDiff.rgb * f;
+		diffuse.a *= (1.0 - f);
 
 		// Update specular
 		f = decalDiff.a * decal.m_blendFactors[1];
-		outSpecular.rgb = outSpecular.rgb * (1.0 - f) + spec.rgb * f;
-		outSpecular.a *= (1.0 - f);
+		roughnessMetallicF0.rgb = roughnessMetallicF0.rgb * (1.0 - f) + spec.rgb * f;
+		roughnessMetallicF0.a *= (1.0 - f);
 	}
 
-	out_color0 = outDiffuse;
-	out_color1 = outSpecular;
+	out_diffuse = diffuse;
+	out_roughnessMetallicF0 = roughnessMetallicF0;
 }
 #pragma anki end

+ 24 - 24
AnKi/Shaders/Include/ClusteredShadingTypes.h

@@ -35,17 +35,17 @@ const U32 MAX_VISIBLE_REFLECTION_PROBES = 16u;
 const U32 MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES = 8u;
 
 // Other consts
-const F32 CLUSTER_OBJECT_FRUSTUM_NEAR_PLANE = 0.1f / 4.0f; ///< The near plane of various clusterer object frustums.
+const ANKI_RP F32 CLUSTER_OBJECT_FRUSTUM_NEAR_PLANE = 0.1f / 4.0f; ///< Near plane of various clusterer object frustums.
 const U32 MAX_SHADOW_CASCADES2 = 4u;
-const F32 SUBSURFACE_MIN = 0.01f;
+const ANKI_RP F32 SUBSURFACE_MIN = 0.01f;
 
 /// Point light.
 struct PointLight
 {
 	Vec3 m_position; ///< Position in world space.
-	Vec3 m_diffuseColor;
-	F32 m_radius; ///< Radius
-	F32 m_squareRadiusOverOne; ///< 1/(radius^2).
+	ANKI_RP Vec3 m_diffuseColor;
+	ANKI_RP F32 m_radius; ///< Radius
+	ANKI_RP F32 m_squareRadiusOverOne; ///< 1/(radius^2).
 	U32 m_shadowLayer; ///< Shadow layer used in RT shadows. Also used to show that it doesn't cast shadow.
 	F32 m_shadowAtlasTileScale; ///< UV scale for all tiles.
 	Vec2 m_shadowAtlasTileOffsets[6u];
@@ -58,13 +58,13 @@ struct SpotLight
 {
 	Vec3 m_position; ///< Position in world space.
 	Vec3 m_edgePoints[4u]; ///< Edge points in world space.
-	Vec3 m_diffuseColor;
-	F32 m_radius; ///< Max distance.
-	F32 m_squareRadiusOverOne; ///< 1/(radius^2).
+	ANKI_RP Vec3 m_diffuseColor;
+	ANKI_RP F32 m_radius; ///< Max distance.
+	ANKI_RP F32 m_squareRadiusOverOne; ///< 1/(radius^2).
 	U32 m_shadowLayer; ///< Shadow layer used in RT shadows. Also used to show that it doesn't cast shadow.
-	Vec3 m_direction; ///< Light direction.
-	F32 m_outerCos;
-	F32 m_innerCos;
+	ANKI_RP Vec3 m_direction; ///< Light direction.
+	ANKI_RP F32 m_outerCos;
+	ANKI_RP F32 m_innerCos;
 	Vec2 m_padding;
 	Mat4 m_textureMatrix;
 };
@@ -75,13 +75,13 @@ ANKI_SHADER_STATIC_ASSERT(sizeof(SpotLight) == _ANKI_SIZEOF_SpotLight);
 struct SpotLightBinning
 {
 	Vec3 m_edgePoints[5u]; ///< Edge points in world space.
-	Vec3 m_diffuseColor;
-	F32 m_radius; ///< Max distance.
-	F32 m_squareRadiusOverOne; ///< 1/(radius^2).
+	ANKI_RP Vec3 m_diffuseColor;
+	ANKI_RP F32 m_radius; ///< Max distance.
+	ANKI_RP F32 m_squareRadiusOverOne; ///< 1/(radius^2).
 	U32 m_shadowLayer; ///< Shadow layer used in RT shadows. Also used to show that it doesn't cast shadow.
-	Vec3 m_direction; ///< Light direction.
-	F32 m_outerCos;
-	F32 m_innerCos;
+	ANKI_RP Vec3 m_direction; ///< Light direction.
+	ANKI_RP F32 m_outerCos;
+	ANKI_RP F32 m_innerCos;
 	Vec2 m_padding;
 	Mat4 m_textureMatrix;
 };
@@ -92,12 +92,12 @@ ANKI_SHADER_STATIC_ASSERT(alignof(SpotLightBinning) == alignof(SpotLight));
 /// Directional light (sun).
 struct DirectionalLight
 {
-	Vec3 m_diffuseColor;
+	ANKI_RP Vec3 m_diffuseColor;
 	U32 m_cascadeCount; ///< If it's zero then it doesn't cast shadow.
-	Vec3 m_direction;
+	ANKI_RP Vec3 m_direction;
 	U32 m_active;
-	F32 m_effectiveShadowDistance;
-	F32 m_shadowCascadesDistancePower;
+	ANKI_RP F32 m_effectiveShadowDistance;
+	ANKI_RP F32 m_shadowCascadesDistancePower;
 	U32 m_shadowLayer; ///< Shadow layer used in RT shadows. Also used to show that it doesn't cast shadow.
 	U32 m_padding;
 	Mat4 m_textureMatrices[MAX_SHADOW_CASCADES2];
@@ -121,7 +121,7 @@ struct Decal
 {
 	Vec4 m_diffuseUv;
 	Vec4 m_normRoughnessUv;
-	Vec4 m_blendFactors;
+	ANKI_RP Vec4 m_blendFactors;
 	Mat4 m_textureMatrix;
 	Mat4 m_invertedTransform;
 	Vec3 m_obbExtend;
@@ -136,7 +136,7 @@ struct FogDensityVolume
 	Vec3 m_aabbMinOrSphereCenter;
 	U32 m_isBox;
 	Vec3 m_aabbMaxOrSphereRadiusSquared;
-	F32 m_density;
+	ANKI_RP F32 m_density;
 };
 const U32 _ANKI_SIZEOF_FogDensityVolume = 2u * ANKI_SIZEOF(Vec4);
 ANKI_SHADER_STATIC_ASSERT(sizeof(FogDensityVolume) == _ANKI_SIZEOF_FogDensityVolume);
@@ -151,7 +151,7 @@ struct GlobalIlluminationProbe
 	F32 m_halfTexelSizeU; ///< (1.0 / textureSize(texArr[textureIndex]).x) / 2.0
 
 	/// Used to calculate a factor that is zero when fragPos is close to AABB bounds and 1.0 at fadeDistance and less.
-	F32 m_fadeDistance;
+	ANKI_RP F32 m_fadeDistance;
 };
 const U32 _ANKI_SIZEOF_GlobalIlluminationProbe = 9u * ANKI_SIZEOF(U32);
 ANKI_SHADER_STATIC_ASSERT(sizeof(GlobalIlluminationProbe) == _ANKI_SIZEOF_GlobalIlluminationProbe);

+ 2 - 0
AnKi/Shaders/Include/Common.h

@@ -36,6 +36,8 @@ ANKI_END_NAMESPACE
 
 #	define ANKI_CPP_CODE(x) x
 
+#	define ANKI_RP
+
 //
 // Macros & functions GLSL
 //

+ 4 - 4
AnKi/Shaders/Include/IndirectDiffuseTypes.h

@@ -14,11 +14,11 @@ struct IndirectDiffuseUniforms
 	UVec2 m_viewportSize;
 	Vec2 m_viewportSizef;
 	Vec4 m_projectionMat;
-	F32 m_radius; ///< In meters.
+	ANKI_RP F32 m_radius; ///< In meters.
 	U32 m_sampleCount;
-	F32 m_sampleCountf;
-	F32 m_ssaoBias;
-	F32 m_ssaoStrength;
+	ANKI_RP F32 m_sampleCountf;
+	ANKI_RP F32 m_ssaoBias;
+	ANKI_RP F32 m_ssaoStrength;
 	F32 m_padding[3u];
 };
 

+ 1 - 1
AnKi/Shaders/Include/LensFlareTypes.h

@@ -13,7 +13,7 @@ ANKI_BEGIN_NAMESPACE
 struct LensFlareSprite
 {
 	Vec4 m_posScale; // xy: Position, zw: Scale
-	Vec4 m_color;
+	ANKI_RP Vec4 m_color;
 	Vec4 m_depthPad3;
 };
 

+ 17 - 17
AnKi/Shaders/IndirectDiffuse.ankiprog

@@ -32,10 +32,10 @@ layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y) in;
 layout(set = 0, binding = 4) writeonly uniform image2D u_outImage;
 
 layout(set = 0, binding = 5) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 6) uniform texture2D u_gbufferRt2;
+layout(set = 0, binding = 6) ANKI_RP uniform texture2D u_gbufferRt2;
 layout(set = 0, binding = 7) uniform texture2D u_depthRt;
-layout(set = 0, binding = 8) uniform texture2D u_lightBufferRt;
-layout(set = 0, binding = 9) uniform texture2D u_historyTex;
+layout(set = 0, binding = 8) ANKI_RP uniform texture2D u_lightBufferRt;
+layout(set = 0, binding = 9) ANKI_RP uniform texture2D u_historyTex;
 layout(set = 0, binding = 10) uniform texture2D u_motionVectorsTex;
 layout(set = 0, binding = 11) uniform texture2D u_motionVectorsRejectionTex;
 
@@ -62,7 +62,7 @@ void main()
 	const Vec2 ndc = UV_TO_NDC(uv);
 
 	// Get normal
-	const Vec3 worldNormal = readNormalFromGBuffer(u_gbufferRt2, u_linearAnyClampSampler, uv);
+	const Vec3 worldNormal = unpackNormalFromGBuffer(textureLod(u_gbufferRt2, u_linearAnyClampSampler, uv, 0.0));
 	const Vec3 viewNormal = u_clusteredShading.m_matrices.m_viewRotation * worldNormal;
 
 	// Get origin
@@ -73,15 +73,15 @@ void main()
 	const Vec3 viewPos = v4.xyz / v4.w;
 
 	// SSGI
-	Vec3 outColor = Vec3(0.0);
-	F32 ssao = 0.0;
+	ANKI_RP Vec3 outColor = Vec3(0.0);
+	ANKI_RP F32 ssao = 0.0;
 	if(ENABLE_SSGI)
 	{
 		// Find the projected radius
-		const Vec3 sphereLimit = viewPos + Vec3(u_unis.m_radius, 0.0, 0.0);
-		const Vec4 projSphereLimit = cheapProject(Vec4(sphereLimit, 1.0));
-		const Vec2 projSphereLimit2 = projSphereLimit.xy / projSphereLimit.w;
-		const F32 projRadius = length(projSphereLimit2 - ndc);
+		const ANKI_RP Vec3 sphereLimit = viewPos + Vec3(u_unis.m_radius, 0.0, 0.0);
+		const ANKI_RP Vec4 projSphereLimit = cheapProject(Vec4(sphereLimit, 1.0));
+		const ANKI_RP Vec2 projSphereLimit2 = projSphereLimit.xy / projSphereLimit.w;
+		const ANKI_RP F32 projRadius = length(projSphereLimit2 - ndc);
 
 		// Loop to compute
 		const UVec2 random = rand3DPCG16(UVec3(gl_GlobalInvocationID.xy, u_clusteredShading.m_frame)).xy;
@@ -100,7 +100,7 @@ void main()
 			// Compute factor
 			const Vec3 dir = s - viewPos;
 			const F32 len = length(dir);
-			const Vec3 n = dir / len;
+			const Vec3 n = normalize(dir);
 			const F32 NoL = max(0.0, dot(viewNormal, n));
 			// const F32 distFactor = 1.0 - sin(min(1.0, len / u_unis.m_radius) * PI / 2.0);
 			const F32 distFactor = 1.0 - min(1.0, len / u_unis.m_radius);
@@ -120,14 +120,14 @@ void main()
 
 			// Append color
 			const F32 w = distFactor * NoL;
-			const Vec3 c = textureLod(u_lightBufferRt, u_linearAnyClampSampler, lastFrameUv, 100.0).xyz;
+			const ANKI_RP Vec3 c = textureLod(u_lightBufferRt, u_linearAnyClampSampler, lastFrameUv, 100.0).xyz;
 			outColor += c * w;
 
 			// Compute SSAO as well
 			ssao += max(dot(viewNormal, dir) + u_unis.m_ssaoBias, EPSILON) / max(len * len, EPSILON);
 		}
 
-		const F32 scount = 1.0 / u_unis.m_sampleCountf;
+		const ANKI_RP F32 scount = 1.0 / u_unis.m_sampleCountf;
 		outColor *= scount * 2.0 * PI;
 		ssao *= scount;
 	}
@@ -138,7 +138,7 @@ void main()
 	{
 		// Sample probes
 
-		Vec3 probeColor = Vec3(0.0);
+		ANKI_RP Vec3 probeColor = Vec3(0.0);
 
 		// Get the cluster
 		Cluster cluster = getClusterFragCoord(Vec3(fragCoord * 2.0, depth));
@@ -176,8 +176,8 @@ void main()
 				totalBlendWeight += blendWeight;
 
 				// Sample
-				const Vec3 c = sampleGlobalIllumination(worldPos, worldNormal, probe, u_globalIlluminationTextures,
-														u_linearAnyClampSampler);
+				const ANKI_RP Vec3 c = sampleGlobalIllumination(worldPos, worldNormal, probe,
+																u_globalIlluminationTextures, u_linearAnyClampSampler);
 				probeColor += c * blendWeight;
 			}
 
@@ -209,7 +209,7 @@ void main()
 		const F32 blendFactor = mix(lowestBlendFactor, 1.0, historyRejectionFactor);
 
 		// Blend with history
-		const Vec3 history = textureLod(u_historyTex, u_linearAnyClampSampler, historyUv, 0.0).rgb;
+		const ANKI_RP Vec3 history = textureLod(u_historyTex, u_linearAnyClampSampler, historyUv, 0.0).rgb;
 		outColor = mix(history, outColor, blendFactor);
 	}
 

+ 9 - 8
AnKi/Shaders/IndirectDiffuseDenoise.ankiprog

@@ -3,7 +3,7 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#pragma anki mutator BLUR_ORIENTATION 0 1 // 0: in X asix, 1: in Y axis
+#pragma anki mutator BLUR_ORIENTATION 0 1 // 0: in X axis, 1: in Y axis
 
 #pragma anki start comp
 
@@ -16,10 +16,10 @@ const UVec2 WORKGROUP_SIZE = UVec2(8u, 8u);
 layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y) in;
 
 layout(set = 0, binding = 0) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 1) uniform texture2D u_toDenoiseTex;
+layout(set = 0, binding = 1) uniform ANKI_RP texture2D u_toDenoiseTex;
 layout(set = 0, binding = 2) uniform texture2D u_depthTex;
-layout(set = 0, binding = 3) uniform texture2D u_gbuffer2Tex;
-layout(set = 0, binding = 4) writeonly uniform image2D u_outImg;
+layout(set = 0, binding = 3) uniform ANKI_RP texture2D u_gbuffer2Tex;
+layout(set = 0, binding = 4) writeonly uniform ANKI_RP image2D u_outImg;
 
 layout(push_constant, std430, row_major) uniform b_pc
 {
@@ -51,11 +51,12 @@ void main()
 	}
 
 	const Vec3 positionCenter = unproject(UV_TO_NDC(uv), depthCenter);
-	const Vec3 normalCenter = readNormalFromGBuffer(u_gbuffer2Tex, u_linearAnyClampSampler, uv);
+	const ANKI_RP Vec3 normalCenter =
+		unpackNormalFromGBuffer(textureLod(u_gbuffer2Tex, u_linearAnyClampSampler, uv, 0.0));
 
 	// Sample
-	F32 weight = EPSILON;
-	Vec3 color = Vec3(0.0);
+	ANKI_RP F32 weight = EPSILON_RP;
+	ANKI_RP Vec3 color = Vec3(0.0);
 
 	for(F32 i = -u_unis.m_sampleCountDiv2; i <= u_unis.m_sampleCountDiv2; i += 1.0)
 	{
@@ -71,7 +72,7 @@ void main()
 		const Vec3 normalTap =
 			unpackNormalFromGBuffer(textureLod(u_gbuffer2Tex, u_linearAnyClampSampler, sampleUv, 0.0));
 
-		F32 w = calculateBilateralWeightPlane(positionCenter, normalCenter, positionTap, normalTap, 1.0);
+		ANKI_RP F32 w = calculateBilateralWeightPlane(positionCenter, normalCenter, positionTap, normalTap, 1.0);
 		// w *= gaussianWeight(0.4, abs(F32(i)) / (sampleCount + 1.0));
 		weight += w;
 

+ 20 - 19
AnKi/Shaders/IrradianceDice.ankiprog

@@ -26,7 +26,7 @@ layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
 #if LIGHT_SHADING_TEX == 0
 layout(set = 0, binding = 1) uniform texture2D u_lightShadingTex2d;
 #else
-layout(set = 0, binding = 1) uniform textureCube u_lightShadingTexCube;
+layout(set = 0, binding = 1) uniform ANKI_RP textureCube u_lightShadingTexCube;
 #endif
 
 #if SECOND_BOUNCE == 1
@@ -50,13 +50,13 @@ layout(push_constant, std430) uniform pc_
 #else
 layout(set = 0, binding = 4, std430) writeonly buffer ssbo1_
 {
-	Vec4 u_irradianceDisceResults[6u];
+	ANKI_RP Vec4 u_irradianceDisceResults[6u];
 };
 #endif
 
-shared Vec3 s_diceIrradiance[6u];
+shared ANKI_RP Vec3 s_diceIrradiance[6u];
 
-Vec3 sampleLightShadingTexture(const U32 face)
+ANKI_RP Vec3 sampleLightShadingTexture(const U32 face)
 {
 #if LIGHT_SHADING_TEX == 0
 	const Vec2 INPUT_TEXTURES_SIZE = Vec2(U32(WORKGROUP_SIZE_XY) * 6u, U32(WORKGROUP_SIZE_XY));
@@ -90,9 +90,9 @@ void main()
 		const Vec3 r = getCubemapDirection(ndc, f);
 
 		// Compute integral part
-		const F32 lambert = max(0.0, dot(r, diceDir));
-		const Vec3 lightShading = sampleLightShadingTexture(f);
-		const Vec3 irradiance = lightShading * lambert * cubeCoordSolidAngle(ndc, WORKGROUP_SIZE_XY_F);
+		const ANKI_RP F32 lambert = max(0.0, dot(r, diceDir));
+		const ANKI_RP Vec3 lightShading = sampleLightShadingTexture(f);
+		const ANKI_RP Vec3 irradiance = lightShading * lambert * cubeCoordSolidAngle(ndc, WORKGROUP_SIZE_XY_F);
 
 		// Store
 		u_integrationResults[f * WORKGROUP_SIZE + gl_LocalInvocationID.y * U32(WORKGROUP_SIZE_XY)
@@ -137,25 +137,26 @@ void main()
 		const Vec3 r = getCubemapDirection(ndc, f);
 
 		// Compute integral part
-		const F32 lambert = max(0.0, dot(r, diceDir));
+		const ANKI_RP F32 lambert = max(0.0, dot(r, diceDir));
 
 		// Read the gbuffer
 		Vec2 gbufferUv = (Vec2(gl_LocalInvocationID.xy) + 0.5) / WORKGROUP_SIZE_XY_F;
 		gbufferUv.x *= 1.0 / 6.0;
 		gbufferUv.x += (1.0 / 6.0) * F32(f);
 		GbufferInfo gbuffer;
-		readGBuffer(u_gbufferTex[0u], u_gbufferTex[1u], u_gbufferTex[2u], u_nearestAnyClampSampler, gbufferUv, 0.0,
-					gbuffer);
+		unpackGBufferNoVelocity(textureLod(u_gbufferTex[0u], u_nearestAnyClampSampler, gbufferUv, 0.0),
+								textureLod(u_gbufferTex[1u], u_nearestAnyClampSampler, gbufferUv, 0.0),
+								textureLod(u_gbufferTex[2u], u_nearestAnyClampSampler, gbufferUv, 0.0), gbuffer);
 
 		// Sample irradiance
-		Vec3 firstBounceIrradiance =
+		ANKI_RP Vec3 firstBounceIrradiance =
 			sampleAmbientDice(s_diceIrradiance[0], s_diceIrradiance[1], s_diceIrradiance[2], s_diceIrradiance[3],
 							  s_diceIrradiance[4], s_diceIrradiance[5], gbuffer.m_normal);
 		firstBounceIrradiance = gbuffer.m_diffuse * firstBounceIrradiance;
 
 		// Compute 2nd bounce
-		const Vec3 lightShading = sampleLightShadingTexture(f);
-		const Vec3 irradiance =
+		const ANKI_RP Vec3 lightShading = sampleLightShadingTexture(f);
+		const ANKI_RP Vec3 irradiance =
 			(firstBounceIrradiance + lightShading * lambert) * cubeCoordSolidAngle(ndc, WORKGROUP_SIZE_XY_F);
 
 		// Store
@@ -190,20 +191,20 @@ void main()
 
 #if DEBUG_MODE == 0
 #	if SECOND_BOUNCE == 1
-		Vec3 irradiance = u_integrationResults[f * WORKGROUP_SIZE + 0u].xyz;
+		ANKI_RP Vec3 irradiance = u_integrationResults[f * WORKGROUP_SIZE + 0u].xyz;
 #	else
-		Vec3 irradiance = s_diceIrradiance[f];
+		ANKI_RP Vec3 irradiance = s_diceIrradiance[f];
 #	endif
-		const Vec3 toStoreValue = irradiance * PI;
+		const ANKI_RP Vec3 toStoreValue = irradiance * PI;
 #elif DEBUG_MODE == 1
-		const Vec3 toStoreValue = colorPerCubeFace(f);
+		const ANKI_RP Vec3 toStoreValue = colorPerCubeFace(f);
 #else
 		const UVec3 volumeSize = UVec3(imageSize(u_irradianceVolume));
 		const UVec3 subvolumeSize = UVec3(volumeSize.x / 6u, volumeSize.y, volumeSize.z);
 		const U32 cellIdx =
 			u_volumeTexel.z * subvolumeSize.x * subvolumeSize.y + u_volumeTexel.y * subvolumeSize.x + u_volumeTexel.x;
-		const F32 headmapFactor = F32(cellIdx) / F32(subvolumeSize.x * subvolumeSize.y * subvolumeSize.z);
-		const Vec3 toStoreValue = heatmap(headmapFactor);
+		const ANKI_RP F32 headmapFactor = F32(cellIdx) / F32(subvolumeSize.x * subvolumeSize.y * subvolumeSize.z);
+		const ANKI_RP Vec3 toStoreValue = heatmap(headmapFactor);
 #endif
 
 #if STORE_LOCATION == 0

+ 5 - 10
AnKi/Shaders/LensFlareSprite.ankiprog

@@ -14,12 +14,7 @@ layout(std140, set = 0, binding = 0) readonly buffer ssbo00
 };
 
 layout(location = 0) out Vec3 out_uv;
-layout(location = 1) flat out Vec4 out_color;
-
-out gl_PerVertex
-{
-	Vec4 gl_Position;
-};
+layout(location = 1) flat out ANKI_RP Vec4 out_color;
 
 void main()
 {
@@ -41,16 +36,16 @@ void main()
 #include <AnKi/Shaders/Common.glsl>
 
 layout(set = 0, binding = 1) uniform sampler u_trilinearRepeatSampler;
-layout(set = 0, binding = 2) uniform texture2DArray u_tex;
+layout(set = 0, binding = 2) uniform ANKI_RP texture2DArray u_tex;
 
 layout(location = 0) in Vec3 in_uv;
-layout(location = 1) flat in Vec4 in_color;
+layout(location = 1) flat in ANKI_RP Vec4 in_color;
 
-layout(location = 0) out Vec4 out_color;
+layout(location = 0) out ANKI_RP Vec4 out_color;
 
 void main()
 {
-	const Vec4 col = texture(u_tex, u_trilinearRepeatSampler, in_uv);
+	const ANKI_RP Vec4 col = texture(u_tex, u_trilinearRepeatSampler, in_uv);
 	out_color = col * in_color;
 }
 #pragma anki end

+ 76 - 58
AnKi/Shaders/LightFunctions.glsl

@@ -78,75 +78,88 @@ Vec3 F_Unreal(Vec3 specular, F32 VoH)
 
 // Fresnel Schlick: "An Inexpensive BRDF Model for Physically-Based Rendering"
 // It has lower VGRPs than F_Unreal
-// specular: The specular color aka F0
-Vec3 F_Schlick(Vec3 specular, F32 VoH)
+ANKI_RP Vec3 F_Schlick(ANKI_RP Vec3 f0, ANKI_RP F32 VoH)
 {
-	const F32 a = 1.0 - VoH;
-	const F32 a2 = a * a;
-	const F32 a5 = a2 * a2 * a; // a5 = a^5
-	return /*saturate(50.0 * specular.g) */ a5 + (1.0 - a5) * specular;
+	const ANKI_RP F32 f = pow(1.0 - VoH, 5.0);
+	return f + f0 * (1.0 - f);
 }
 
 // D(n,h) aka NDF: GGX Trowbridge-Reitz
-F32 D_GGX(F32 roughness, F32 NoH)
+ANKI_RP F32 D_GGX(ANKI_RP F32 roughness, ANKI_RP F32 NoH, ANKI_RP Vec3 h, ANKI_RP Vec3 worldNormal)
 {
-	const F32 a = roughness * roughness;
-	const F32 a2 = a * a;
+#if 0 && ANKI_OS_ANDROID
+	const ANKI_RP Vec3 NxH = cross(worldNormal, h);
+	const ANKI_RP F32 oneMinusNoHSquared = dot(NxH, NxH);
+#else
+	const ANKI_RP F32 oneMinusNoHSquared = 1.0 - NoH * NoH;
+#endif
 
-	const F32 D = (NoH * a2 - NoH) * NoH + 1.0;
-	return a2 / (PI * D * D);
+	const ANKI_RP F32 a = roughness * roughness;
+	const ANKI_RP F32 v = NoH * a;
+	const ANKI_RP F32 k = a / (oneMinusNoHSquared + v * v);
+	const ANKI_RP F32 d = k * k * (1.0 / PI);
+	return saturateRp(d);
 }
 
 // Visibility term: Geometric shadowing divided by BRDF denominator
-F32 V_Schlick(F32 roughness, F32 NoV, F32 NoL)
+ANKI_RP F32 V_Schlick(ANKI_RP F32 roughness, ANKI_RP F32 NoV, ANKI_RP F32 NoL)
 {
-	const F32 k = (roughness * roughness) * 0.5;
-	const F32 Vis_SchlickV = NoV * (1.0 - k) + k;
-	const F32 Vis_SchlickL = NoL * (1.0 - k) + k;
+	const ANKI_RP F32 k = (roughness * roughness) * 0.5;
+	const ANKI_RP F32 Vis_SchlickV = NoV * (1.0 - k) + k;
+	const ANKI_RP F32 Vis_SchlickL = NoL * (1.0 - k) + k;
 	return 0.25 / (Vis_SchlickV * Vis_SchlickL);
 }
 
-Vec3 envBRDF(Vec3 specular, F32 roughness, texture2D integrationLut, sampler integrationLutSampler, F32 NoV)
+// Visibility term: Hammon 2017, "PBR Diffuse Lighting for GGX+Smith Microsurfaces"
+ANKI_RP F32 V_SmithGGXCorrelatedFast(ANKI_RP F32 roughness, ANKI_RP F32 NoV, ANKI_RP F32 NoL)
 {
-	const Vec2 envBRDF = textureLod(integrationLut, integrationLutSampler, Vec2(roughness, NoV), 0.0).xy;
-	return specular * envBRDF.x + min(1.0, 50.0 * specular.g) * envBRDF.y;
+	const ANKI_RP F32 a = roughness * roughness;
+	const ANKI_RP F32 v = 0.5 / mix(2.0 * NoL * NoV, NoL + NoV, a);
+	return saturateRp(v);
+}
+
+ANKI_RP F32 Fd_Lambert()
+{
+	return 1.0 / PI;
 }
 
-Vec3 diffuseLambert(Vec3 diffuse)
+ANKI_RP Vec3 diffuseLobe(ANKI_RP Vec3 diffuse)
 {
-	return diffuse * (1.0 / PI);
+	return diffuse * Fd_Lambert();
 }
 
 // Performs BRDF specular lighting
-Vec3 computeSpecularColorBrdf(GbufferInfo gbuffer, Vec3 viewDir, Vec3 frag2Light)
+ANKI_RP Vec3 specularIsotropicLobe(GbufferInfo gbuffer, Vec3 viewDir, Vec3 frag2Light)
 {
-	const Vec3 H = normalize(frag2Light + viewDir);
+	const ANKI_RP Vec3 H = normalize(frag2Light + viewDir);
 
-	const F32 NoL = max(EPSILON, dot(gbuffer.m_normal, frag2Light));
-	const F32 VoH = max(EPSILON, dot(viewDir, H));
-	const F32 NoH = max(EPSILON, dot(gbuffer.m_normal, H));
-	const F32 NoV = max(EPSILON, dot(gbuffer.m_normal, viewDir));
+	const ANKI_RP F32 NoL = max(0.0, dot(gbuffer.m_normal, frag2Light));
+	const ANKI_RP F32 VoH = max(0.0, dot(viewDir, H));
+	const ANKI_RP F32 NoH = max(0.0, dot(gbuffer.m_normal, H));
+	const ANKI_RP F32 NoV = max(0.05, dot(gbuffer.m_normal, viewDir));
 
 	// F
-#if 0
-	const Vec3 F = F_Unreal(gbuffer.m_specular, VoH);
-#else
-	const Vec3 F = F_Schlick(gbuffer.m_specular, VoH);
-#endif
+	const ANKI_RP Vec3 F = F_Schlick(gbuffer.m_f0, VoH);
 
 	// D
-	const F32 D = D_GGX(gbuffer.m_roughness, NoH);
+	const ANKI_RP F32 D = D_GGX(gbuffer.m_roughness, NoH, H, gbuffer.m_normal);
 
 	// Vis
-	const F32 V = V_Schlick(gbuffer.m_roughness, NoV, NoL);
+	const ANKI_RP F32 V = V_SmithGGXCorrelatedFast(gbuffer.m_roughness, NoV, NoL);
 
 	return F * (V * D);
 }
 
-F32 computeSpotFactor(Vec3 l, F32 outerCos, F32 innerCos, Vec3 spotDir)
+Vec3 envBRDF(Vec3 specular, F32 roughness, texture2D integrationLut, sampler integrationLutSampler, F32 NoV)
+{
+	const Vec2 envBRDF = textureLod(integrationLut, integrationLutSampler, Vec2(roughness, NoV), 0.0).xy;
+	return specular * envBRDF.x + min(1.0, 50.0 * specular.g) * envBRDF.y;
+}
+
+ANKI_RP F32 computeSpotFactor(ANKI_RP Vec3 l, ANKI_RP F32 outerCos, ANKI_RP F32 innerCos, ANKI_RP Vec3 spotDir)
 {
-	const F32 costheta = -dot(l, spotDir);
-	const F32 spotFactor = smoothstep(outerCos, innerCos, costheta);
+	const ANKI_RP F32 costheta = -dot(l, spotDir);
+	const ANKI_RP F32 spotFactor = smoothstep(outerCos, innerCos, costheta);
 	return spotFactor;
 }
 
@@ -162,7 +175,7 @@ U32 computeShadowSampleCount(const U32 COUNT, F32 zVSpace)
 	return sampleCount;
 }
 
-F32 computeShadowFactorSpotLight(SpotLight light, Vec3 worldPos, texture2D spotMap, sampler spotMapSampler)
+ANKI_RP F32 computeShadowFactorSpotLight(SpotLight light, Vec3 worldPos, texture2D spotMap, sampler spotMapSampler)
 {
 	const Vec4 texCoords4 = light.m_textureMatrix * Vec4(worldPos, 1.0);
 	const Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
@@ -173,7 +186,8 @@ F32 computeShadowFactorSpotLight(SpotLight light, Vec3 worldPos, texture2D spotM
 }
 
 // Compute the shadow factor of point (omni) lights.
-F32 computeShadowFactorPointLight(PointLight light, Vec3 frag2Light, texture2D shadowMap, sampler shadowMapSampler)
+ANKI_RP F32 computeShadowFactorPointLight(PointLight light, Vec3 frag2Light, texture2D shadowMap,
+										  sampler shadowMapSampler)
 {
 	const Vec3 dir = -frag2Light;
 	const Vec3 dirabs = abs(dir);
@@ -201,21 +215,22 @@ F32 computeShadowFactorPointLight(PointLight light, Vec3 frag2Light, texture2D s
 	const Vec2 atlasOffset = light.m_shadowAtlasTileOffsets[faceIdxu];
 
 	// Compute UV
-	uv = fma(uv, Vec2(light.m_shadowAtlasTileScale), atlasOffset);
+	uv *= Vec2(light.m_shadowAtlasTileScale);
+	uv += atlasOffset;
 
 	// Sample
 	const Vec4 shadowMoments = textureLod(shadowMap, shadowMapSampler, uv, 0.0);
 
 	// 3) Compare
 	//
-	const F32 shadowFactor = evsmComputeShadowFactor(z, shadowMoments);
+	const ANKI_RP F32 shadowFactor = evsmComputeShadowFactor(z, shadowMoments);
 
 	return shadowFactor;
 }
 
 // Compute the shadow factor of a directional light
-F32 computeShadowFactorDirLight(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, texture2D shadowMap,
-								sampler shadowMapSampler)
+ANKI_RP F32 computeShadowFactorDirLight(DirectionalLight light, U32 cascadeIdx, Vec3 worldPos, texture2D shadowMap,
+										sampler shadowMapSampler)
 {
 #define ANKI_FAST_CASCADES_WORKAROUND 1 // Doesn't make sense but it's super fast
 
@@ -249,13 +264,13 @@ F32 computeShadowFactorDirLight(DirectionalLight light, U32 cascadeIdx, Vec3 wor
 }
 
 // Compute the shadow factor of a directional light
-F32 computeShadowFactorDirLight(Mat4 lightProjectionMat, Vec3 worldPos, texture2D shadowMap,
-								samplerShadow shadowMapSampler)
+ANKI_RP F32 computeShadowFactorDirLight(Mat4 lightProjectionMat, Vec3 worldPos, texture2D shadowMap,
+										samplerShadow shadowMapSampler)
 {
 	const Vec4 texCoords4 = lightProjectionMat * Vec4(worldPos, 1.0);
 	const Vec3 texCoords3 = texCoords4.xyz / texCoords4.w;
 
-	const F32 shadowFactor = textureLod(shadowMap, shadowMapSampler, texCoords3, 0.0);
+	const ANKI_RP F32 shadowFactor = textureLod(shadowMap, shadowMapSampler, texCoords3, 0.0);
 	return shadowFactor;
 }
 
@@ -287,10 +302,10 @@ Vec3 computeCubemapVecCheap(Vec3 r, F32 R2, Vec3 f)
 	return r;
 }
 
-F32 computeAttenuationFactor(F32 squareRadiusOverOne, Vec3 frag2Light)
+ANKI_RP F32 computeAttenuationFactor(ANKI_RP F32 squareRadiusOverOne, ANKI_RP Vec3 frag2Light)
 {
-	const F32 fragLightDist = dot(frag2Light, frag2Light);
-	F32 att = 1.0 - fragLightDist * squareRadiusOverOne;
+	const ANKI_RP F32 fragLightDist = dot(frag2Light, frag2Light);
+	ANKI_RP F32 att = 1.0 - fragLightDist * squareRadiusOverOne;
 	att = max(0.0, att);
 	return att * att;
 }
@@ -327,24 +342,26 @@ F32 computeProbeBlendWeight(Vec3 fragPos, // Doesn't need to be inside the AABB
 
 // Given the value of the 6 faces of the dice and a normal, sample the correct weighted value.
 // https://www.shadertoy.com/view/XtcBDB
-Vec3 sampleAmbientDice(Vec3 posx, Vec3 negx, Vec3 posy, Vec3 negy, Vec3 posz, Vec3 negz, Vec3 normal)
+ANKI_RP Vec3 sampleAmbientDice(ANKI_RP Vec3 posx, ANKI_RP Vec3 negx, ANKI_RP Vec3 posy, ANKI_RP Vec3 negy,
+							   ANKI_RP Vec3 posz, ANKI_RP Vec3 negz, ANKI_RP Vec3 normal)
 {
-	const Vec3 axisWeights = abs(normal);
-	const Vec3 uv = NDC_TO_UV(normal);
+	const ANKI_RP Vec3 axisWeights = abs(normal);
+	const ANKI_RP Vec3 uv = NDC_TO_UV(normal);
 
-	Vec3 col = mix(negx, posx, uv.x) * axisWeights.x;
+	ANKI_RP Vec3 col = mix(negx, posx, uv.x) * axisWeights.x;
 	col += mix(negy, posy, uv.y) * axisWeights.y;
 	col += mix(negz, posz, uv.z) * axisWeights.z;
 
 	// Divide by weight
-	col /= axisWeights.x + axisWeights.y + axisWeights.z + EPSILON;
+	col /= axisWeights.x + axisWeights.y + axisWeights.z + EPSILON_RP;
 
 	return col;
 }
 
 // Sample the irradiance term from the clipmap
-Vec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const GlobalIlluminationProbe probe,
-							  texture3D textures[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES], sampler linearAnyClampSampler)
+ANKI_RP Vec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const GlobalIlluminationProbe probe,
+									  ANKI_RP texture3D textures[MAX_VISIBLE_GLOBAL_ILLUMINATION_PROBES],
+									  sampler linearAnyClampSampler)
 {
 	// Find the UVW
 	Vec3 uvw = (worldPos - probe.m_aabbMin) / (probe.m_aabbMax - probe.m_aabbMin);
@@ -356,7 +373,7 @@ Vec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const Glob
 	uvw.x = clamp(uvw.x, probe.m_halfTexelSizeU, (1.0 / 6.0) - probe.m_halfTexelSizeU);
 
 	// Read the irradiance
-	Vec3 irradiancePerDir[6u];
+	ANKI_RP Vec3 irradiancePerDir[6u];
 	ANKI_UNROLL for(U32 dir = 0u; dir < 6u; ++dir)
 	{
 		// Point to the correct UV
@@ -368,8 +385,9 @@ Vec3 sampleGlobalIllumination(const Vec3 worldPos, const Vec3 normal, const Glob
 	}
 
 	// Sample the irradiance
-	const Vec3 irradiance = sampleAmbientDice(irradiancePerDir[0], irradiancePerDir[1], irradiancePerDir[2],
-											  irradiancePerDir[3], irradiancePerDir[4], irradiancePerDir[5], normal);
+	const ANKI_RP Vec3 irradiance =
+		sampleAmbientDice(irradiancePerDir[0], irradiancePerDir[1], irradiancePerDir[2], irradiancePerDir[3],
+						  irradiancePerDir[4], irradiancePerDir[5], normal);
 
 	return irradiance;
 }

+ 25 - 22
AnKi/Shaders/LightShading.ankiprog

@@ -42,16 +42,16 @@ layout(set = 0, binding = 16) uniform texture2D u_resolvedSm;
 
 layout(location = 0) in Vec2 in_uv;
 
-layout(location = 0) out Vec3 out_color;
+layout(location = 0) out ANKI_RP Vec3 out_color;
 
 // Common code for lighting
 #define LIGHTING_COMMON_BRDF() \
-	const Vec3 frag2Light = light.m_position - worldPos; \
-	const Vec3 l = normalize(frag2Light); \
-	const Vec3 specC = computeSpecularColorBrdf(gbuffer, viewDir, l); \
-	const Vec3 diffC = diffuseLambert(gbuffer.m_diffuse); \
-	const F32 att = computeAttenuationFactor(light.m_squareRadiusOverOne, frag2Light); \
-	F32 lambert = max(0.0, dot(gbuffer.m_normal, l));
+	const ANKI_RP Vec3 frag2Light = light.m_position - worldPos; \
+	const ANKI_RP Vec3 l = normalize(frag2Light); \
+	const ANKI_RP Vec3 specC = specularIsotropicLobe(gbuffer, viewDir, l); \
+	const ANKI_RP Vec3 diffC = diffuseLobe(gbuffer.m_diffuse); \
+	const ANKI_RP F32 att = computeAttenuationFactor(light.m_squareRadiusOverOne, frag2Light); \
+	ANKI_RP F32 lambert = max(0.0, dot(gbuffer.m_normal, l));
 
 void main()
 {
@@ -76,15 +76,17 @@ void main()
 
 	// Decode GBuffer
 	GbufferInfo gbuffer;
-	readGBuffer(u_msRt0, u_msRt1, u_msRt2, u_nearestAnyClampSampler, in_uv, 0.0, gbuffer);
+	unpackGBufferNoVelocity(textureLod(u_msRt0, u_nearestAnyClampSampler, in_uv, 0.0),
+							textureLod(u_msRt1, u_nearestAnyClampSampler, in_uv, 0.0),
+							textureLod(u_msRt2, u_nearestAnyClampSampler, in_uv, 0.0), gbuffer);
 	gbuffer.m_subsurface = max(gbuffer.m_subsurface, SUBSURFACE_MIN);
 
 	// SM
 #if USE_SHADOW_LAYERS
-	F32 resolvedSm[MAX_RT_SHADOW_LAYERS];
+	ANKI_RP F32 resolvedSm[MAX_RT_SHADOW_LAYERS];
 	unpackRtShadows(textureLod(u_shadowLayersTex, u_nearestAnyClampSampler, in_uv, 0.0), resolvedSm);
 #else
-	Vec4 resolvedSm = textureLod(u_resolvedSm, u_trilinearClampSampler, in_uv, 0.0);
+	ANKI_RP Vec4 resolvedSm = textureLod(u_resolvedSm, u_trilinearClampSampler, in_uv, 0.0);
 	U32 resolvedSmIdx = 0u;
 #endif
 
@@ -92,11 +94,11 @@ void main()
 	out_color = gbuffer.m_diffuse * gbuffer.m_emission;
 
 	// Dir light
-	const Vec3 viewDir = normalize(u_clusteredShading.m_cameraPosition - worldPos);
+	const ANKI_RP Vec3 viewDir = normalize(u_clusteredShading.m_cameraPosition - worldPos);
 	const DirectionalLight dirLight = u_clusteredShading.m_directionalLight;
 	if(dirLight.m_active != 0u)
 	{
-		F32 shadowFactor;
+		ANKI_RP F32 shadowFactor;
 		if(dirLight.m_cascadeCount > 0u)
 		{
 #if USE_SHADOW_LAYERS
@@ -111,12 +113,12 @@ void main()
 			shadowFactor = 1.0;
 		}
 
-		const Vec3 l = -dirLight.m_direction;
+		const ANKI_RP Vec3 l = -dirLight.m_direction;
 
-		const F32 lambert = max(gbuffer.m_subsurface, dot(l, gbuffer.m_normal));
+		const ANKI_RP F32 lambert = max(gbuffer.m_subsurface, dot(l, gbuffer.m_normal));
 
-		const Vec3 diffC = diffuseLambert(gbuffer.m_diffuse);
-		const Vec3 specC = computeSpecularColorBrdf(gbuffer, viewDir, l);
+		const ANKI_RP Vec3 diffC = diffuseLobe(gbuffer.m_diffuse);
+		const ANKI_RP Vec3 specC = specularIsotropicLobe(gbuffer, viewDir, l);
 
 		out_color += (diffC + specC) * dirLight.m_diffuseColor * (shadowFactor * lambert);
 	}
@@ -133,9 +135,9 @@ void main()
 		ANKI_BRANCH if(light.m_shadowAtlasTileScale >= 0.0)
 		{
 #if USE_SHADOW_LAYERS
-			const F32 shadow = resolvedSm[light.m_shadowLayer];
+			const ANKI_RP F32 shadow = resolvedSm[light.m_shadowLayer];
 #else
-			const F32 shadow = resolvedSm[resolvedSmIdx++];
+			const ANKI_RP F32 shadow = resolvedSm[resolvedSmIdx++];
 #endif
 			lambert *= shadow;
 		}
@@ -157,9 +159,9 @@ void main()
 		ANKI_BRANCH if(light.m_shadowLayer != MAX_U32)
 		{
 #if USE_SHADOW_LAYERS
-			const F32 shadow = resolvedSm[light.m_shadowLayer];
+			const ANKI_RP F32 shadow = resolvedSm[light.m_shadowLayer];
 #else
-			const F32 shadow = resolvedSm[resolvedSmIdx++];
+			const ANKI_RP F32 shadow = resolvedSm[resolvedSmIdx++];
 #endif
 			lambert *= shadow;
 		}
@@ -223,10 +225,11 @@ void main()
 
 		// Compute env BRDF
 		const F32 NoV = max(EPSILON, dot(gbuffer.m_normal, viewDir));
-		const Vec3 env =
-			envBRDF(gbuffer.m_specular, gbuffer.m_roughness, u_integrationLut, u_trilinearClampSampler, NoV);
+		const Vec3 env = envBRDF(gbuffer.m_f0, gbuffer.m_roughness, u_integrationLut, u_trilinearClampSampler, NoV);
 
 		out_color += finalSpecIndirect * env;
 	}
+
+	out_color = saturateRp(out_color);
 }
 #pragma anki end

+ 4 - 4
AnKi/Shaders/LightShadingApplyFog.ankiprog

@@ -15,7 +15,7 @@ ANKI_SPECIALIZATION_CONSTANT_U32(FINAL_Z_SPLIT, 1u);
 #include <AnKi/Shaders/Functions.glsl>
 
 layout(location = 0) in Vec2 in_uv;
-layout(location = 0) out Vec4 out_color;
+layout(location = 0) out ANKI_RP Vec4 out_color;
 
 layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
 layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
@@ -42,9 +42,9 @@ void main()
 	uvw.xy = in_uv;
 
 	// Read the volume
-	const Vec4 fogVals = textureLod(u_fogVolume, u_linearAnyClampSampler, uvw, 0.0);
-	const Vec3 inScattering = fogVals.rgb;
-	const F32 transmittance = fogVals.a;
+	const ANKI_RP Vec4 fogVals = textureLod(u_fogVolume, u_linearAnyClampSampler, uvw, 0.0);
+	const ANKI_RP Vec3 inScattering = fogVals.rgb;
+	const ANKI_RP F32 transmittance = fogVals.a;
 
 	// Apply the fog
 	out_color = Vec4(inScattering, transmittance);

+ 9 - 5
AnKi/Shaders/LightShadingApplyIndirect.ankiprog

@@ -9,13 +9,14 @@
 
 #pragma anki start frag
 #include <AnKi/Shaders/BilateralFilter.glsl>
+#include <AnKi/Shaders/PackFunctions.glsl>
 
 layout(set = 0, binding = 0) uniform sampler u_nearestAnyClampSampler;
 layout(set = 0, binding = 1) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 2) uniform texture2D u_quarterDiffuseIndirectTex;
+layout(set = 0, binding = 2) uniform ANKI_RP texture2D u_quarterDiffuseIndirectTex;
 layout(set = 0, binding = 3) uniform texture2D u_quarterDepthTex;
 layout(set = 0, binding = 4) uniform texture2D u_fullDepthTex;
-layout(set = 0, binding = 5) uniform texture2D u_gbuffer0Tex;
+layout(set = 0, binding = 5) uniform ANKI_RP texture2D u_gbuffer0Tex;
 
 layout(push_constant, std430) uniform b_pc
 {
@@ -24,7 +25,7 @@ layout(push_constant, std430) uniform b_pc
 };
 
 layout(location = 0) in Vec2 in_uv;
-layout(location = 0) out Vec3 out_color;
+layout(location = 0) out ANKI_RP Vec3 out_color;
 
 void main()
 {
@@ -45,14 +46,17 @@ void main()
 			const F32 w = calculateBilateralWeightDepth(depthCenter, depthTap, 1.0);
 			sumWeight += w;
 
-			const Vec3 colorTap = textureLod(u_quarterDiffuseIndirectTex, u_nearestAnyClampSampler, sampleUv, 0.0).xyz;
+			const ANKI_RP Vec3 colorTap =
+				textureLod(u_quarterDiffuseIndirectTex, u_nearestAnyClampSampler, sampleUv, 0.0).xyz;
 			out_color += colorTap * w;
 		}
 	}
 
 	// Modulate
 	out_color /= sumWeight;
-	const Vec3 albedo = textureLod(u_gbuffer0Tex, u_linearAnyClampSampler, in_uv, 0.0).xyz;
+	const ANKI_RP F32 metallic = 0.0;
+	const ANKI_RP Vec3 albedo =
+		unpackDiffuseFromGBuffer(textureLod(u_gbuffer0Tex, u_linearAnyClampSampler, in_uv, 0.0), metallic);
 	out_color *= albedo;
 }
 #pragma anki end

+ 5 - 6
AnKi/Shaders/MotionBlur.glsl

@@ -8,8 +8,8 @@
 #include <AnKi/Shaders/Common.glsl>
 
 // Perform motion blur.
-Vec3 motionBlur(texture2D motionVectorsRt, sampler motionVectorsRtSampler, texture2D toBlurRt, Vec2 toBlurRtSize,
-				sampler toBlurRtSampler, Vec2 uv, U32 maxSamples)
+ANKI_RP Vec3 motionBlur(texture2D motionVectorsRt, sampler motionVectorsRtSampler, ANKI_RP texture2D toBlurRt,
+						Vec2 toBlurRtSize, sampler toBlurRtSampler, Vec2 uv, U32 maxSamples)
 {
 	// Compute velocity. Get the max velocity around the curent sample to avoid outlines. TAA's result and the motion
 	// vectors RT do not quite overlap
@@ -44,16 +44,15 @@ Vec3 motionBlur(texture2D motionVectorsRt, sampler motionVectorsRtSampler, textu
 	sampleCountf = round(sampleCountf);
 
 	// Sample
-	Vec3 outColor = textureLod(toBlurRt, toBlurRtSampler, uv, 0.0).rgb;
+	const ANKI_RP F32 weight = 1.0 / sampleCountf;
+	ANKI_RP Vec3 outColor = textureLod(toBlurRt, toBlurRtSampler, uv, 0.0).rgb * weight;
 	ANKI_LOOP for(F32 s = 1.0; s < sampleCountf; s += 1.0)
 	{
 		const F32 f = s / sampleCountf;
 		const Vec2 sampleUv = uv + velocity * f;
 
-		outColor += textureLod(toBlurRt, toBlurRtSampler, sampleUv, 0.0).rgb;
+		outColor += textureLod(toBlurRt, toBlurRtSampler, sampleUv, 0.0).rgb * weight;
 	}
 
-	outColor /= sampleCountf;
-
 	return outColor;
 }

+ 32 - 39
AnKi/Shaders/PackFunctions.glsl

@@ -41,7 +41,7 @@ Vec3 signedOctEncode(Vec3 n)
 	outn.x = n.x * 0.5 + outn.y;
 	outn.y = n.x * -0.5 + outn.y;
 
-	outn.z = saturate(n.z * FLT_MAX);
+	outn.z = saturate(n.z * MAX_F32);
 	return outn;
 }
 
@@ -122,33 +122,34 @@ Vec2 unpackUnorm1ToUnorm2(F32 c)
 #endif
 }
 
-const F32 ABSOLUTE_MAX_EMISSION = 1024.0;
+const ANKI_RP F32 ABSOLUTE_MAX_EMISSION = 1024.0;
 #if !defined(MAX_EMISSION)
-const F32 MAX_EMISSION = 30.0; // Max emission. Keep as low as possible and less than ABSOLUTE_MAX_EMISSION
+const ANKI_RP F32 MAX_EMISSION = 30.0; // Max emission. Keep as low as possible and less than ABSOLUTE_MAX_EMISSION
 #endif
 // Round the MAX_EMISSION to fit a U8_UNORM
-const F32 FIXED_MAX_EMISSION = F32(U32(MAX_EMISSION / ABSOLUTE_MAX_EMISSION * 255.0)) / 255.0 * ABSOLUTE_MAX_EMISSION;
+const ANKI_RP F32 FIXED_MAX_EMISSION =
+	F32(U32(MAX_EMISSION / ABSOLUTE_MAX_EMISSION * 255.0)) / 255.0 * ABSOLUTE_MAX_EMISSION;
 
-const F32 MIN_ROUGHNESS = 0.05;
+const ANKI_RP F32 MIN_ROUGHNESS = 0.05;
 
 // G-Buffer structure
 struct GbufferInfo
 {
-	Vec3 m_diffuse;
-	Vec3 m_specular;
-	Vec3 m_normal;
-	F32 m_roughness;
-	F32 m_metallic;
-	F32 m_subsurface;
-	F32 m_emission;
+	ANKI_RP Vec3 m_diffuse;
+	ANKI_RP Vec3 m_f0; ///< Freshnel at zero angles.
+	ANKI_RP Vec3 m_normal;
+	ANKI_RP F32 m_roughness;
+	ANKI_RP F32 m_metallic;
+	ANKI_RP F32 m_subsurface;
+	ANKI_RP F32 m_emission;
 	Vec2 m_velocity;
 };
 
 // Populate the G buffer
-void writeGBuffer(GbufferInfo g, out Vec4 rt0, out Vec4 rt1, out Vec4 rt2, out Vec2 rt3)
+void packGBuffer(GbufferInfo g, out Vec4 rt0, out Vec4 rt1, out Vec4 rt2, out Vec2 rt3)
 {
 	rt0 = Vec4(g.m_diffuse, g.m_subsurface);
-	rt1 = Vec4(g.m_roughness, g.m_metallic, g.m_specular.x, FIXED_MAX_EMISSION / ABSOLUTE_MAX_EMISSION);
+	rt1 = Vec4(g.m_roughness, g.m_metallic, g.m_f0.x, FIXED_MAX_EMISSION / ABSOLUTE_MAX_EMISSION);
 
 	const Vec3 encNorm = signedOctEncode(g.m_normal);
 	rt2 = Vec4(g.m_emission / FIXED_MAX_EMISSION, encNorm);
@@ -156,49 +157,41 @@ void writeGBuffer(GbufferInfo g, out Vec4 rt0, out Vec4 rt1, out Vec4 rt2, out V
 	rt3 = g.m_velocity;
 }
 
-Vec3 unpackNormalFromGBuffer(Vec4 gbuffer)
+ANKI_RP Vec3 unpackDiffuseFromGBuffer(ANKI_RP Vec4 rt0, ANKI_RP F32 metallic)
 {
-	return signedOctDecode(gbuffer.gba);
+	return rt0.xyz *= 1.0 - metallic;
 }
 
-// Read from G-buffer
-Vec3 readNormalFromGBuffer(texture2D rt2, sampler sampl, Vec2 uv)
+ANKI_RP Vec3 unpackNormalFromGBuffer(ANKI_RP Vec4 rt2)
 {
-	return unpackNormalFromGBuffer(textureLod(rt2, sampl, uv, 0.0));
+	return signedOctDecode(rt2.gba);
 }
 
-// Read the roughness from G-buffer
-F32 readRoughnessFromGBuffer(texture2D rt1, sampler sampl, Vec2 uv)
+ANKI_RP F32 unpackRoughnessFromGBuffer(ANKI_RP Vec4 rt1)
 {
-	F32 r = textureLod(rt1, sampl, uv, 0.0).r;
+	ANKI_RP F32 r = rt1.x;
 	r = r * (1.0 - MIN_ROUGHNESS) + MIN_ROUGHNESS;
 	return r;
 }
 
 // Read part of the G-buffer
-void readGBuffer(texture2D rt0, texture2D rt1, texture2D rt2, sampler sampl, Vec2 uv, F32 lod, out GbufferInfo g)
+void unpackGBufferNoVelocity(ANKI_RP Vec4 rt0, ANKI_RP Vec4 rt1, ANKI_RP Vec4 rt2, out GbufferInfo g)
 {
-	Vec4 comp = textureLod(rt0, sampl, uv, 0.0);
-	g.m_diffuse = comp.xyz;
-	g.m_subsurface = comp.w;
+	g.m_diffuse = rt0.xyz;
+	g.m_subsurface = rt0.w;
 
-	comp = textureLod(rt1, sampl, uv, 0.0);
-	g.m_roughness = comp.x;
-	g.m_metallic = comp.y;
-	g.m_specular = Vec3(comp.z);
-	const F32 maxEmission = comp.w * ABSOLUTE_MAX_EMISSION;
+	g.m_roughness = unpackRoughnessFromGBuffer(rt1);
+	g.m_metallic = rt1.y;
+	g.m_f0 = Vec3(rt1.z);
+	const ANKI_RP F32 maxEmission = rt1.w * ABSOLUTE_MAX_EMISSION;
 
-	comp = textureLod(rt2, sampl, uv, 0.0);
-	g.m_normal = signedOctDecode(comp.yzw);
-	g.m_emission = comp.x * maxEmission;
+	g.m_normal = signedOctDecode(rt2.yzw);
+	g.m_emission = rt2.x * maxEmission;
 
-	g.m_velocity = Vec2(FLT_MAX); // Put something random
-
-	// Fix roughness
-	g.m_roughness = g.m_roughness * (1.0 - MIN_ROUGHNESS) + MIN_ROUGHNESS;
+	g.m_velocity = Vec2(MAX_F32); // Put something random
 
 	// Compute reflectance
-	g.m_specular = mix(g.m_specular, g.m_diffuse, g.m_metallic);
+	g.m_f0 = mix(g.m_f0, g.m_diffuse, g.m_metallic);
 
 	// Compute diffuse
 	g.m_diffuse *= 1.0 - g.m_metallic;

+ 1 - 1
AnKi/Shaders/RtShadowsDenoise.ankiprog

@@ -78,7 +78,7 @@ void main()
 	}
 
 	const Vec3 positionCenter = unproject(UV_TO_NDC(uv), depthCenter);
-	const Vec3 normalCenter = readNormalFromGBuffer(u_gbuffer2Tex, u_linearAnyClampSampler, uv);
+	const Vec3 normalCenter = unpackNormalFromGBuffer(textureLod(u_gbuffer2Tex, u_linearAnyClampSampler, uv, 0.0));
 
 	F32 shadowFactors[MAX_RT_SHADOW_LAYERS];
 	unpackRtShadows(textureLod(u_inTex, u_nearestAnyClampSampler, uv, 0.0), shadowFactors);

+ 1 - 1
AnKi/Shaders/RtShadowsRayGen.ankiprog

@@ -84,7 +84,7 @@ void main()
 	}
 
 	// World normal
-	const Vec3 normal = readNormalFromGBuffer(u_normalRt, u_linearAnyClampSampler, uv);
+	const Vec3 normal = unpackNormalFromGBuffer(textureLod(u_normalRt, u_linearAnyClampSampler, uv, 0.0));
 
 	// Cluster
 	Cluster cluster = getClusterFragCoord(Vec3(uv * u_clusteredShading.m_renderingSize, depth));

+ 7 - 7
AnKi/Shaders/ShadowmapsResolve.ankiprog

@@ -19,7 +19,7 @@ ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 5u);
 const UVec2 WORKGROUP_SIZE = UVec2(8, 8);
 layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
 
-layout(set = 0, binding = 5, rgba8) writeonly uniform image2D u_outImg;
+layout(set = 0, binding = 5, rgba8) writeonly uniform ANKI_RP image2D u_outImg;
 layout(set = 0, binding = 6) uniform sampler u_linearAnyClampSampler;
 layout(set = 0, binding = 7) uniform texture2D u_depthRt;
 
@@ -45,17 +45,17 @@ void main()
 	// Layers
 	U32 shadowCasterCountPerFragment = 0u;
 	const U32 maxShadowCastersPerFragment = 4u;
-	F32 shadowFactors[maxShadowCastersPerFragment] = F32[](0.0, 0.0, 0.0, 0.0);
+	ANKI_RP F32 shadowFactors[maxShadowCastersPerFragment] = F32[](0.0, 0.0, 0.0, 0.0);
 
 	// Dir light
 	const DirectionalLight dirLight = u_clusteredShading.m_directionalLight;
 	if(dirLight.m_active != 0u && dirLight.m_cascadeCount > 0u)
 	{
-		const F32 positiveZViewSpace =
+		const ANKI_RP F32 positiveZViewSpace =
 			testPlanePoint(u_clusteredShading.m_nearPlaneWSpace.xyz, u_clusteredShading.m_nearPlaneWSpace.w, worldPos)
 			+ u_clusteredShading.m_near;
 
-		F32 shadowFactor;
+		ANKI_RP F32 shadowFactor;
 		if(positiveZViewSpace < dirLight.m_effectiveShadowDistance)
 		{
 			const U32 cascadeIdx =
@@ -65,7 +65,7 @@ void main()
 			shadowFactor =
 				computeShadowFactorDirLight(dirLight, cascadeIdx, worldPos, u_shadowAtlasTex, u_linearAnyClampSampler);
 
-			F32 distanceFadeFactor = saturate(positiveZViewSpace / dirLight.m_effectiveShadowDistance);
+			ANKI_RP F32 distanceFadeFactor = saturate(positiveZViewSpace / dirLight.m_effectiveShadowDistance);
 			distanceFadeFactor = pow(distanceFadeFactor, 8.0);
 			shadowFactor += distanceFadeFactor;
 		}
@@ -89,7 +89,7 @@ void main()
 		{
 			const Vec3 frag2Light = light.m_position - worldPos;
 
-			const F32 shadowFactor =
+			const ANKI_RP F32 shadowFactor =
 				computeShadowFactorPointLight(light, frag2Light, u_shadowAtlasTex, u_linearAnyClampSampler);
 			shadowFactors[min(maxShadowCastersPerFragment - 1u, shadowCasterCountPerFragment++)] = shadowFactor;
 		}
@@ -104,7 +104,7 @@ void main()
 
 		ANKI_BRANCH if(light.m_shadowLayer != MAX_U32)
 		{
-			const F32 shadowFactor =
+			const ANKI_RP F32 shadowFactor =
 				computeShadowFactorSpotLight(light, worldPos, u_shadowAtlasTex, u_linearAnyClampSampler);
 			shadowFactors[min(maxShadowCastersPerFragment - 1u, shadowCasterCountPerFragment++)] = shadowFactor;
 		}

+ 5 - 4
AnKi/Shaders/Ssr.ankiprog

@@ -60,8 +60,8 @@ void main()
 	const Vec2 uv = (Vec2(fixedGlobalInvocationId.xy) + 0.5) / Vec2(u_unis.m_framebufferSize);
 
 	// Read part of the G-buffer
-	const F32 roughness = readRoughnessFromGBuffer(u_gbufferRt1, u_trilinearClampSampler, uv);
-	const Vec3 worldNormal = readNormalFromGBuffer(u_gbufferRt2, u_trilinearClampSampler, uv);
+	const F32 roughness = unpackRoughnessFromGBuffer(textureLod(u_gbufferRt1, u_trilinearClampSampler, uv, 0.0));
+	const Vec3 worldNormal = unpackNormalFromGBuffer(textureLod(u_gbufferRt2, u_trilinearClampSampler, uv, 0.0));
 
 	// Get depth
 	const F32 depth = textureLod(u_depthRt, u_trilinearClampSampler, uv, 0.0).r;
@@ -96,7 +96,8 @@ void main()
 	ANKI_BRANCH if(hitAttenuation > 0.0)
 	{
 		const Vec3 hitNormal =
-			u_unis.m_normalMat * readNormalFromGBuffer(u_gbufferRt2, u_trilinearClampSampler, hitPoint.xy);
+			u_unis.m_normalMat
+			* unpackNormalFromGBuffer(textureLod(u_gbufferRt2, u_trilinearClampSampler, hitPoint.xy, 0.0));
 		F32 backFaceAttenuation;
 		rejectBackFaces(reflVec, hitNormal, backFaceAttenuation);
 
@@ -133,7 +134,7 @@ void main()
 
 		// Read the light buffer
 		outColor.rgb = textureLod(u_lightBufferRt, u_trilinearClampSampler, hitPoint.xy, lod).rgb;
-		outColor.rgb = clamp(outColor.rgb, 0.0, FLT_MAX); // Fix the value just in case
+		outColor.rgb = clamp(outColor.rgb, 0.0, MAX_F32); // Fix the value just in case
 		outColor.rgb *= hitAttenuation;
 		outColor.a = 1.0 - hitAttenuation;
 	}

+ 1 - 1
AnKi/Shaders/TonemappingAverageLuminance.ankiprog

@@ -90,7 +90,7 @@ void main()
 #endif
 
 		// This is a workaround because sometimes the avg lum becomes nan
-		finalAvgLum = clamp(finalAvgLum, EPSILON, FLT_MAX);
+		finalAvgLum = clamp(finalAvgLum, EPSILON, MAX_F32);
 
 		u_averageLuminance = finalAvgLum;
 		u_exposureThreshold0 = computeExposure(u_averageLuminance, 0.0);

+ 24 - 24
AnKi/Shaders/TonemappingFunctions.glsl

@@ -8,41 +8,41 @@
 #include <AnKi/Shaders/Common.glsl>
 
 // A tick to compute log of base 10
-F32 log10(F32 x)
+ANKI_RP F32 log10(ANKI_RP F32 x)
 {
 	return log(x) / log(10.0);
 }
 
-F32 computeLuminance(Vec3 color)
+ANKI_RP F32 computeLuminance(ANKI_RP Vec3 color)
 {
-	return max(dot(Vec3(0.30, 0.59, 0.11), color), EPSILON);
+	return max(dot(Vec3(0.30, 0.59, 0.11), color), EPSILON_RP);
 }
 
-F32 computeExposure(F32 avgLum, F32 threshold)
+ANKI_RP F32 computeExposure(ANKI_RP F32 avgLum, ANKI_RP F32 threshold)
 {
-	const F32 keyValue = 1.03 - (2.0 / (2.0 + log10(avgLum + 1.0)));
-	const F32 linearExposure = (keyValue / avgLum);
-	F32 exposure = log2(linearExposure);
+	const ANKI_RP F32 keyValue = 1.03 - (2.0 / (2.0 + log10(avgLum + 1.0)));
+	const ANKI_RP F32 linearExposure = (keyValue / avgLum);
+	ANKI_RP F32 exposure = log2(linearExposure);
 
 	exposure -= threshold;
 	return exp2(exposure);
 }
 
-Vec3 computeExposedColor(Vec3 color, F32 avgLum, F32 threshold)
+ANKI_RP Vec3 computeExposedColor(ANKI_RP Vec3 color, ANKI_RP F32 avgLum, ANKI_RP F32 threshold)
 {
 	return computeExposure(avgLum, threshold) * color;
 }
 
 // Reinhard operator
-Vec3 tonemapReinhard(Vec3 color, F32 saturation)
+ANKI_RP Vec3 tonemapReinhard(ANKI_RP Vec3 color, ANKI_RP F32 saturation)
 {
-	const F32 lum = computeLuminance(color);
-	const F32 toneMappedLuminance = lum / (lum + 1.0);
+	const ANKI_RP F32 lum = computeLuminance(color);
+	const ANKI_RP F32 toneMappedLuminance = lum / (lum + 1.0);
 	return toneMappedLuminance * pow(color / lum, Vec3(saturation));
 }
 
 // Uncharted 2 operator
-Vec3 tonemapUncharted2(Vec3 color)
+ANKI_RP Vec3 tonemapUncharted2(ANKI_RP Vec3 color)
 {
 	const F32 A = 0.15;
 	const F32 B = 0.50;
@@ -54,42 +54,42 @@ Vec3 tonemapUncharted2(Vec3 color)
 	return ((color * (A * color + C * B) + D * E) / (color * (A * color + B) + D * F)) - E / F;
 }
 
-Vec3 tonemapACESFilm(Vec3 x)
+ANKI_RP Vec3 tonemapACESFilm(ANKI_RP Vec3 x)
 {
-	const F32 a = 2.51;
-	const F32 b = 0.03;
-	const F32 c = 2.43;
-	const F32 d = 0.59;
-	const F32 e = 0.14;
+	const ANKI_RP F32 a = 2.51;
+	const ANKI_RP F32 b = 0.03;
+	const ANKI_RP F32 c = 2.43;
+	const ANKI_RP F32 d = 0.59;
+	const ANKI_RP F32 e = 0.14;
 
 	return saturate((x * (a * x + b)) / (x * (c * x + d) + e));
 }
 
-Vec3 tonemap(Vec3 color, F32 exposure)
+ANKI_RP Vec3 tonemap(ANKI_RP Vec3 color, ANKI_RP F32 exposure)
 {
 	color *= exposure;
 #if 0
-	const F32 saturation = 1.0;
+	const ANKI_RP F32 saturation = 1.0;
 	return tonemapReinhard(color, saturation);
 #else
 	return tonemapACESFilm(color);
 #endif
 }
 
-Vec3 tonemap(Vec3 color, F32 avgLum, F32 threshold)
+ANKI_RP Vec3 tonemap(ANKI_RP Vec3 color, ANKI_RP F32 avgLum, ANKI_RP F32 threshold)
 {
-	const F32 exposure = computeExposure(avgLum, threshold);
+	const ANKI_RP F32 exposure = computeExposure(avgLum, threshold);
 	return tonemap(color, exposure);
 }
 
 // https://graphicrants.blogspot.com/2013/12/tone-mapping.html
-Vec3 invertibleTonemap(Vec3 colour)
+ANKI_RP Vec3 invertibleTonemap(ANKI_RP Vec3 colour)
 {
 	// 1 / (1 + max(rgb))
 	return colour / (1.0 + max(max(colour.r, colour.g), colour.b));
 }
 
-Vec3 invertInvertibleTonemap(Vec3 colour)
+ANKI_RP Vec3 invertInvertibleTonemap(ANKI_RP Vec3 colour)
 {
 	// 1 / (1 - max(rgb))
 	return colour / (1.0 - max(max(colour.r, colour.g), colour.b));

+ 5 - 3
AnKi/Shaders/TraditionalDeferredShading.ankiprog

@@ -93,14 +93,16 @@ void main()
 
 	// Decode and process gbuffer
 	GbufferInfo gbuffer;
-	readGBuffer(u_msRt0, u_msRt1, u_msRt2, u_msSampler, uvToRead, 0.0, gbuffer);
+	unpackGBufferNoVelocity(textureLod(u_msRt0, u_msSampler, uvToRead, 0.0),
+							textureLod(u_msRt1, u_msSampler, uvToRead, 0.0),
+							textureLod(u_msRt2, u_msSampler, uvToRead, 0.0), gbuffer);
 	gbuffer.m_subsurface = max(gbuffer.m_subsurface, SUBSURFACE_MIN * 8.0);
 
 	const Vec4 worldPos4 = u_unis.m_invViewProjMat * Vec4(UV_TO_NDC(uvToWrite), depth, 1.0);
 	const Vec3 worldPos = worldPos4.xyz / worldPos4.w;
 
 	// Compute diff
-	const Vec3 diffC = diffuseLambert(gbuffer.m_diffuse);
+	const Vec3 diffC = diffuseLobe(gbuffer.m_diffuse);
 
 	// Compute spec
 	const Vec3 viewDir = normalize(u_unis.m_camPos - worldPos);
@@ -113,7 +115,7 @@ void main()
 #endif
 
 #if SPECULAR == 1
-	const Vec3 specC = computeSpecularColorBrdf(gbuffer, viewDir, l);
+	const Vec3 specC = specularIsotropicLobe(gbuffer, viewDir, l);
 #else
 	const Vec3 specC = Vec3(0.0);
 #endif

+ 26 - 25
AnKi/Shaders/VolumetricFogAccumulation.ankiprog

@@ -16,17 +16,17 @@ const UVec2 WORKGROUP_SIZE = UVec2(8, 8);
 layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
 
 layout(set = 0, binding = 0) uniform sampler u_linearAnyClampSampler;
-layout(set = 0, binding = 1) uniform texture3D u_lightVolume;
-layout(set = 0, binding = 2) writeonly uniform image3D u_fogVolume;
+layout(set = 0, binding = 1) uniform ANKI_RP texture3D u_lightVolume;
+layout(set = 0, binding = 2) writeonly uniform ANKI_RP image3D u_fogVolume;
 
-layout(push_constant, std430) uniform pc_
+layout(push_constant, std430) uniform b_pc
 {
-	F32 u_fogScatteringCoeff;
-	F32 u_fogAbsorptionCoeff;
-	F32 u_density;
-	F32 u_near;
-	Vec3 u_fogDiffuse;
-	F32 u_far;
+	ANKI_RP F32 u_fogScatteringCoeff;
+	ANKI_RP F32 u_fogAbsorptionCoeff;
+	ANKI_RP F32 u_density;
+	ANKI_RP F32 u_near;
+	ANKI_RP Vec3 u_fogDiffuse;
+	ANKI_RP F32 u_far;
 };
 
 void main()
@@ -38,42 +38,43 @@ void main()
 
 	const Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(VOLUME_SIZE.xy);
 
-	Vec4 colorAndDensityFront = Vec4(0.0);
+	ANKI_RP Vec4 colorAndDensityFront = Vec4(0.0);
 	ANKI_LOOP for(U32 i = 0u; i < VOLUME_SIZE.z; ++i)
 	{
-		const F32 fi = F32(i);
+		const ANKI_RP F32 fi = F32(i);
 
 		// Compute the linear depth
-		const F32 maxLinearDepth = F32(FINAL_Z_SPLIT + 1u) / F32(Z_SPLIT_COUNT);
-		const F32 linearDepthFraction = maxLinearDepth / F32(VOLUME_SIZE.z);
-		const F32 linearDepthNear = fi * linearDepthFraction;
-		const F32 linearDepthFar = (fi + 1.0) * linearDepthFraction;
+		const ANKI_RP F32 maxLinearDepth = F32(FINAL_Z_SPLIT + 1u) / F32(Z_SPLIT_COUNT);
+		const ANKI_RP F32 linearDepthFraction = maxLinearDepth / F32(VOLUME_SIZE.z);
+		const ANKI_RP F32 linearDepthNear = fi * linearDepthFraction;
+		const ANKI_RP F32 linearDepthFar = (fi + 1.0) * linearDepthFraction;
 
 		// Compute the min and max Z in view space if this cluster fragment
-		const F32 zVSpaceNear = -linearDepthNear * (u_far - u_near) + u_near;
-		const F32 zVSpaceFar = -linearDepthFar * (u_far - u_near) + u_near;
+		const ANKI_RP F32 zVSpaceNear = -linearDepthNear * (u_far - u_near) + u_near;
+		const ANKI_RP F32 zVSpaceFar = -linearDepthFar * (u_far - u_near) + u_near;
 
 		// Compute the thikness of this fragment
-		const F32 layerThinkness = abs(zVSpaceNear - zVSpaceFar);
+		const ANKI_RP F32 layerThinkness = abs(zVSpaceNear - zVSpaceFar);
 
 		// Read the light value and the fog density from the fog volumes
-		const F32 w = (fi + 0.5) / F32(VOLUME_SIZE.z);
-		Vec4 lightAndFogDensity = textureLod(u_lightVolume, u_linearAnyClampSampler, Vec3(uv, w), 0.0);
+		const ANKI_RP F32 w = (fi + 0.5) / F32(VOLUME_SIZE.z);
+		ANKI_RP Vec4 lightAndFogDensity = textureLod(u_lightVolume, u_linearAnyClampSampler, Vec3(uv, w), 0.0);
 		lightAndFogDensity.xyz *= u_fogDiffuse / PI;
 		lightAndFogDensity.w += u_density; // Apply the default density
 
 		// Scattering & absorption
-		const F32 scattering = lightAndFogDensity.w * u_fogScatteringCoeff * layerThinkness;
-		const F32 absorption = lightAndFogDensity.w * u_fogAbsorptionCoeff * layerThinkness;
+		const ANKI_RP F32 scattering = lightAndFogDensity.w * u_fogScatteringCoeff * layerThinkness;
+		const ANKI_RP F32 absorption = lightAndFogDensity.w * u_fogAbsorptionCoeff * layerThinkness;
 
 		// Integrate
-		const Vec4 colorAndDensityBack = Vec4(lightAndFogDensity.xyz * scattering, scattering + absorption);
+		const ANKI_RP Vec4 colorAndDensityBack = Vec4(lightAndFogDensity.xyz * scattering, scattering + absorption);
 
-		const Vec3 l = colorAndDensityFront.rgb + saturate(exp(-colorAndDensityFront.a)) * colorAndDensityBack.rgb;
+		const ANKI_RP Vec3 l =
+			colorAndDensityFront.rgb + saturate(exp(-colorAndDensityFront.a)) * colorAndDensityBack.rgb;
 		colorAndDensityFront = Vec4(l.rgb, colorAndDensityFront.a + colorAndDensityBack.a);
 
 		// Write the value
-		const Vec4 valToWrite = Vec4(colorAndDensityFront.rgb, saturate(exp(-colorAndDensityFront.a)));
+		const ANKI_RP Vec4 valToWrite = Vec4(colorAndDensityFront.rgb, saturate(exp(-colorAndDensityFront.a)));
 		imageStore(u_fogVolume, IVec3(UVec3(gl_GlobalInvocationID.xy, i)), valToWrite);
 	}
 }

+ 1 - 1
Tools/Android/app/build.gradle

@@ -1,7 +1,7 @@
 apply plugin: 'com.android.application'
 
 android {
-    compileSdkVersion 30
+    compileSdkVersion 31
     ndkVersion "23.0.7599858"
     defaultConfig {
         applicationId "org.anki.%TARGET%"

+ 1 - 1
Tools/Android/build.gradle

@@ -6,7 +6,7 @@ buildscript {
         jcenter()
     }
     dependencies {
-        classpath 'com.android.tools.build:gradle:7.0.0'
+        classpath 'com.android.tools.build:gradle:4.2.0'
 
         // NOTE: Do not place your application dependencies here; they belong
         // in the individual module build.gradle files