Bladeren bron

- DLSS integration

Sergio Alapont 3 jaren geleden
bovenliggende
commit
c82668b77b

+ 8 - 16
AnKi/Gr/DLSSCtx.h

@@ -5,8 +5,8 @@
 
 #pragma once
 
-#include <AnKi/Math.h>
 #include <AnKi/Gr/GrObject.h>
+#include <AnKi/Math.h>
 #include <AnKi/Util/WeakArray.h>
 
 namespace anki {
@@ -24,11 +24,11 @@ enum class DLSSQualityMode : U8
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(DLSSQualityMode)
 
-class DLSSCtxInitInfo 
+class DLSSCtxInitInfo
 {
 public:
-	UVec2 m_srcRes = {0,0};
-	UVec2 m_dstRes = { 0,0 };
+	UVec2 m_srcRes = {0, 0};
+	UVec2 m_dstRes = {0, 0};
 	DLSSQualityMode m_mode = DLSSQualityMode::PERFORMANCE;
 };
 
@@ -39,8 +39,11 @@ class DLSSCtx : public GrObject
 public:
 	static constexpr GrObjectType CLASS_TYPE = GrObjectType::DLSS_CTX;
 
-protected:
+	void upscale(CommandBufferPtr cmdb, const TextureViewPtr& srcRt, const TextureViewPtr& dstRt,
+				 const TextureViewPtr& mvRt, const TextureViewPtr& depthRt, const TextureViewPtr& exposure,
+				 const Bool resetAccumulation, const Vec2& jitterOffset, const Vec2& mVScale);
 
+protected:
 	/// Construct.
 	DLSSCtx(GrManager* manager, CString name)
 		: GrObject(manager, CLASS_TYPE, name)
@@ -52,17 +55,6 @@ protected:
 	{
 	}
 
-	void upscale(CommandBufferPtr cmdb,
-		const TexturePtr srcRt,
-		const TexturePtr dstRt,
-		const TexturePtr mvRt,
-		const TexturePtr depthRt,
-		const TexturePtr exposure,
-		const Bool resetAccumulation,
-		const F32 sharpness,
-		const Vec2& jitterOffset,
-		const Vec2& mVScale);
-
 private:
 	/// Allocate and initialize a new instance.
 	static ANKI_USE_RESULT DLSSCtx* newInstance(GrManager* manager, const DLSSCtxInitInfo& init);

+ 4 - 11
AnKi/Gr/Vulkan/DLSSCtx.cpp

@@ -26,19 +26,12 @@ DLSSCtx* DLSSCtx::newInstance(GrManager* manager, const DLSSCtxInitInfo& init)
 	return impl;
 }
 
-void DLSSCtx::upscale(CommandBufferPtr cmdb,
-	const TexturePtr srcRt,
-	const TexturePtr dstRt,
-	const TexturePtr mvRt,
-	const TexturePtr depthRt,
-	const TexturePtr exposure,
-	const Bool resetAccumulation,
-	const F32 sharpness,
-	const Vec2& jitterOffset,
-	const Vec2& mVScale) 
+void DLSSCtx::upscale(CommandBufferPtr cmdb, const TextureViewPtr& srcRt, const TextureViewPtr& dstRt,
+					  const TextureViewPtr& mvRt, const TextureViewPtr& depthRt, const TextureViewPtr& exposure,
+					  const Bool resetAccumulation, const Vec2& jitterOffset, const Vec2& mVScale)
 {
 	ANKI_VK_SELF(DLSSCtxImpl);
-	self.upscale(cmdb, srcRt, dstRt, mvRt, depthRt, exposure, resetAccumulation, sharpness, jitterOffset, mVScale);
+	self.upscale(cmdb, srcRt, dstRt, mvRt, depthRt, exposure, resetAccumulation, jitterOffset, mVScale);
 }
 
 } // end namespace anki

+ 58 - 11
AnKi/Gr/Vulkan/DLSSCtxImpl.cpp

@@ -139,16 +139,21 @@ Error DLSSCtxImpl::init(const DLSSCtxInitInfo& init)
 	return Error::NONE;
 }
 
-void DLSSCtxImpl::upscale(CommandBufferPtr cmdb,
-    const TexturePtr srcRt,
-    const TexturePtr dstRt,
-    const TexturePtr mvRt,
-    const TexturePtr depthRt,
-    const TexturePtr exposure,
-    const Bool resetAccumulation,
-    const F32 sharpness,
-    const Vec2& jitterOffset,
-    const Vec2& mVScale)
+static NVSDK_NGX_Resource_VK getNGXResourceFromAnkiTexture(const TextureViewImpl& tex, Bool isUAV)
+{
+	NVSDK_NGX_Resource_VK resourceVK = {};
+	VkImageView imageView = tex.getHandle();
+	VkFormat format = convertFormat(tex.getTextureImpl().getFormat());
+	VkImage image = tex.getTextureImpl().m_imageHandle;
+	VkImageSubresourceRange subresourceRange = tex.getVkImageSubresourceRange();
+
+	return NVSDK_NGX_Create_ImageView_Resource_VK(imageView, image, subresourceRange, format,
+												  tex.getTextureImpl().getWidth(), tex.getTextureImpl().getHeight(), isUAV);
+}
+
+void DLSSCtxImpl::upscale(CommandBufferPtr cmdb, const TextureViewPtr& srcRt, const TextureViewPtr& dstRt,
+						  const TextureViewPtr& mvRt, const TextureViewPtr& depthRt, const TextureViewPtr& exposure,
+						  const Bool resetAccumulation, const Vec2& jitterOffset, const Vec2& mVScale)
 {
     if (!isNgxInitialized())
     {
@@ -156,7 +161,50 @@ void DLSSCtxImpl::upscale(CommandBufferPtr cmdb,
         return;
     }
 
+    const TextureViewImpl& srcViewImpl = static_cast<const TextureViewImpl&>(*srcRt);
+	const TextureViewImpl& dstViewImpl = static_cast<const TextureViewImpl&>(*dstRt);
+	const TextureViewImpl& mvViewImpl = static_cast<const TextureViewImpl&>(*mvRt);
+	const TextureViewImpl& depthViewImpl = static_cast<const TextureViewImpl&>(*depthRt);
+	const TextureViewImpl& exposureViewImpl = static_cast<const TextureViewImpl&>(*exposure);
+
+    NVSDK_NGX_Resource_VK srcResVk = getNGXResourceFromAnkiTexture(srcViewImpl, false);
+	NVSDK_NGX_Resource_VK dstResVk = getNGXResourceFromAnkiTexture(dstViewImpl, true);
+	NVSDK_NGX_Resource_VK mvResVk = getNGXResourceFromAnkiTexture(mvViewImpl, false);
+	NVSDK_NGX_Resource_VK depthResVk = getNGXResourceFromAnkiTexture(depthViewImpl, false);
+	NVSDK_NGX_Resource_VK exposureResVk = getNGXResourceFromAnkiTexture(exposureViewImpl, true);
+
+    NVSDK_NGX_Coordinates renderingOffset = {0, 0};
+	NVSDK_NGX_Dimensions renderingSize = {srcViewImpl.getTextureImpl().getWidth(), srcViewImpl.getTextureImpl().getHeight()}; 
+
+    NVSDK_NGX_VK_DLSS_Eval_Params vkDlssEvalParams;
+	memset(&vkDlssEvalParams, 0, sizeof(vkDlssEvalParams));
+	vkDlssEvalParams.Feature.pInColor = &srcResVk;
+	vkDlssEvalParams.Feature.pInOutput = &dstResVk;
+	vkDlssEvalParams.pInDepth = &depthResVk;
+	vkDlssEvalParams.pInMotionVectors = &mvResVk;
+	vkDlssEvalParams.pInExposureTexture = &exposureResVk;
+	vkDlssEvalParams.InJitterOffsetX = jitterOffset.x();
+	vkDlssEvalParams.InJitterOffsetY = jitterOffset.y();
+	vkDlssEvalParams.Feature.InSharpness = m_recommendedSettings.m_recommendedSharpness;
+	vkDlssEvalParams.InReset = resetAccumulation;
+	vkDlssEvalParams.InMVScaleX = mVScale.x();
+	vkDlssEvalParams.InMVScaleY = mVScale.y();
+	vkDlssEvalParams.InColorSubrectBase = renderingOffset;
+	vkDlssEvalParams.InDepthSubrectBase = renderingOffset;
+	vkDlssEvalParams.InTranslucencySubrectBase = renderingOffset;
+	vkDlssEvalParams.InMVSubrectBase = renderingOffset;
+	vkDlssEvalParams.InRenderSubrectDimensions = renderingSize;
+
+    CommandBufferImpl& cmdbImpl = static_cast<CommandBufferImpl&>(*cmdb);
+
+    getGrManagerImpl().beginMarker(cmdbImpl.getHandle(), "DLSS");
+	NVSDK_NGX_Result result = NGX_VULKAN_EVALUATE_DLSS_EXT(cmdbImpl.getHandle(), m_dlssFeature, m_ngxParameters, &vkDlssEvalParams);
+	getGrManagerImpl().endMarker(cmdbImpl.getHandle());
 
+    if(NVSDK_NGX_FAILED(result))
+	{
+		ANKI_LOGE("Failed to NVSDK_NGX_VULKAN_EvaluateFeature for DLSS, code = 0x%08x, info: %ls", result, GetNGXResultAsString(result));
+	}
 }
 
 void DLSSCtxImpl::shutdown() 
@@ -168,7 +216,6 @@ void DLSSCtxImpl::shutdown()
 
         if (m_dlssFeature != nullptr)
         {
-            ANKI_VK_LOGW("Attempt to release NGX library before features have been released!  Releasing now but should check your code.");
             releaseDLSSFeature();
         }
         NVSDK_NGX_VULKAN_DestroyParameters(m_ngxParameters);

+ 3 - 10
AnKi/Gr/Vulkan/DLSSCtxImpl.h

@@ -45,16 +45,9 @@ public:
 
 	Bool isNgxInitialized() const { return m_ngxInitialized; }
 
-	void upscale(CommandBufferPtr cmdb,
-		const TexturePtr srcRt,
-		const TexturePtr dstRt,
-		const TexturePtr mvRt,
-		const TexturePtr depthRt,
-		const TexturePtr exposure,
-		const Bool resetAccumulation,
-		const F32 sharpness,
-		const Vec2& jitterOffset,
-		const Vec2& mVScale);
+	void upscale(CommandBufferPtr cmdb, const TextureViewPtr& srcRt, const TextureViewPtr& dstRt,
+				 const TextureViewPtr& mvRt, const TextureViewPtr& depthRt, const TextureViewPtr& exposure,
+				 const Bool resetAccumulation, const Vec2& jitterOffset, const Vec2& mVScale);
 
 private:
 

+ 7 - 3
AnKi/Renderer/DownscaleBlur.cpp

@@ -6,6 +6,7 @@
 #include <AnKi/Renderer/DownscaleBlur.h>
 #include <AnKi/Renderer/Renderer.h>
 #include <AnKi/Renderer/TemporalAA.h>
+#include <AnKi/Renderer/LightShading.h>
 #include <AnKi/Core/ConfigSet.h>
 
 namespace anki {
@@ -88,6 +89,9 @@ void DownscaleBlur::populateRenderGraph(RenderingContext& ctx)
 	// Create passes
 	static const Array<CString, 8> passNames = {"DownBlur #0",  "Down/Blur #1", "Down/Blur #2", "Down/Blur #3",
 												"Down/Blur #4", "Down/Blur #5", "Down/Blur #6", "Down/Blur #7"};
+
+	RenderTargetHandle srcTarget = m_r->getUsingDLSS() ? m_r->getLightShading().getRt() : m_r->getTemporalAA().getHdrRt();
+
 	if(getConfig().getRPreferCompute())
 	{
 		for(U32 i = 0; i < m_passCount; ++i)
@@ -113,7 +117,7 @@ void DownscaleBlur::populateRenderGraph(RenderingContext& ctx)
 				TextureSubresourceInfo renderSubresource;
 
 				pass.newDependency({m_runCtx.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE, renderSubresource});
-				pass.newDependency({m_r->getTemporalAA().getHdrRt(), TextureUsageBit::SAMPLED_COMPUTE});
+				pass.newDependency({srcTarget, TextureUsageBit::SAMPLED_COMPUTE});
 			}
 		}
 	}
@@ -143,7 +147,7 @@ void DownscaleBlur::populateRenderGraph(RenderingContext& ctx)
 				TextureSubresourceInfo renderSubresource;
 
 				pass.newDependency({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
-				pass.newDependency({m_r->getTemporalAA().getHdrRt(), TextureUsageBit::SAMPLED_FRAGMENT});
+				pass.newDependency({srcTarget, TextureUsageBit::SAMPLED_FRAGMENT});
 			}
 		}
 	}
@@ -168,7 +172,7 @@ void DownscaleBlur::run(U32 passIdx, RenderPassWorkContext& rgraphCtx)
 	}
 	else
 	{
-		rgraphCtx.bindColorTexture(0, 1, m_r->getTemporalAA().getHdrRt());
+		rgraphCtx.bindColorTexture(0, 1, m_r->getUsingDLSS() ? m_r->getLightShading().getRt() : m_r->getTemporalAA().getHdrRt());
 	}
 
 	if(getConfig().getRPreferCompute())

+ 12 - 1
AnKi/Renderer/FinalComposite.cpp

@@ -48,6 +48,7 @@ Error FinalComposite::initInternal()
 	variantInitInfo.addConstant("LUT_SIZE", U32(LUT_SIZE));
 	variantInitInfo.addConstant("FB_SIZE", m_r->getPostProcessResolution());
 	variantInitInfo.addConstant("MOTION_BLUR_SAMPLES", getConfig().getRMotionBlurSamples());
+	variantInitInfo.addMutation("APPLY_TONEMAPPING", m_r->getUsingDLSS() ? 1 : 0);
 
 	for(U32 dbg = 0; dbg < 2; ++dbg)
 	{
@@ -113,6 +114,11 @@ void FinalComposite::populateRenderGraph(RenderingContext& ctx)
 		RenderPassDependency(m_r->getMotionVectors().getMotionVectorsRt(), TextureUsageBit::SAMPLED_FRAGMENT));
 	pass.newDependency(RenderPassDependency(m_r->getGBuffer().getDepthRt(), TextureUsageBit::SAMPLED_FRAGMENT));
 
+	if(m_r->getUsingDLSS())
+	{
+		pass.newDependency(RenderPassDependency(m_r->getTonemapping().getAverageLuminanceBuffer(), BufferUsageBit::UNIFORM_FRAGMENT));
+	}
+
 	RenderTargetHandle dbgRt;
 	Bool dbgRtValid;
 	ShaderProgramPtr debugProgram;
@@ -162,9 +168,14 @@ void FinalComposite::run(RenderingContext& ctx, RenderPassWorkContext& rgraphCtx
 		rgraphCtx.bindTexture(0, 8, m_r->getGBuffer().getDepthRt(),
 							  TextureSubresourceInfo(DepthStencilAspectBit::DEPTH));
 
+		if(m_r->getUsingDLSS())
+		{
+			rgraphCtx.bindUniformBuffer(0, 9, m_r->getTonemapping().getAverageLuminanceBuffer());
+		}
+
 		if(dbgEnabled)
 		{
-			rgraphCtx.bindColorTexture(0, 9, m_r->getDbg().getRt());
+			rgraphCtx.bindColorTexture(0, 10, m_r->getDbg().getRt());
 		}
 
 		const UVec4 frameCount(m_r->getFrameCount() & MAX_U32);

+ 57 - 3
AnKi/Renderer/Renderer.cpp

@@ -60,6 +60,12 @@ Renderer::~Renderer()
 	m_currentDebugRtName.destroy(getAllocator());
 }
 
+Bool Renderer::getUsingDLSS() const
+{
+	Bool needsScaling = getPostProcessResolution() != getInternalResolution();
+	return needsScaling && (getConfig().getRDlss() != 0) && m_gr->getDeviceCapabilities().m_dlssSupport;
+}
+
 Error Renderer::init(ThreadHive* hive, ResourceManager* resources, GrManager* gl, StagingGpuMemoryPool* stagingMem,
 					 UiManager* ui, HeapAllocator<U8> alloc, ConfigSet* config, Timestamp* globTimestamp,
 					 UVec2 swapchainSize)
@@ -244,7 +250,7 @@ Error Renderer::initInternal(UVec2 swapchainResolution)
 		sinit.m_anisotropyLevel = m_config->getRTextureAnisotropy();
 		m_samplers.m_trilinearRepeatAniso = m_gr->newSampler(sinit);
 
-		const F32 scalingMipBias = log2(F32(m_internalResolution.x()) / F32(m_postProcessResolution.x()));
+		const F32 scalingMipBias = log2(F32(m_internalResolution.x()) / F32(m_postProcessResolution.x())) - (getUsingDLSS() ? 1 : 0);
 		sinit.m_lodBias = scalingMipBias;
 		m_samplers.m_trilinearRepeatAnisoResolutionScalingBias = m_gr->newSampler(sinit);
 	}
@@ -293,6 +299,51 @@ void Renderer::initJitteredMats()
 	}
 }
 
+void Renderer::updateJitterMatrix(Mat4& jitterMatrix)
+{
+	if(!getUsingDLSS())
+	{
+		jitterMatrix = m_jitteredMats8x[m_frameCount & (m_jitteredMats8x.getSize() - 1)];
+	}
+	else
+	{
+		// Halton jitter
+		Vec2 offset(0.0f, 0.0f);
+
+		constexpr I64 s_BaseX = 2;
+		I64 index = m_frameCount + 1;
+		float InvBase = 1.0f / s_BaseX;
+		float fraction = InvBase;
+		while(index > 0)
+		{
+			offset[0] += (index % s_BaseX) * fraction;
+			index /= s_BaseX;
+			fraction *= InvBase;
+		}
+
+		constexpr I64 s_BaseY = 3;
+		index = m_frameCount + 1;
+		InvBase = 1.0f / s_BaseY;
+		fraction = InvBase;
+		while(index > 0)
+		{
+			offset[1] += (index % s_BaseY) * fraction;
+			index /= s_BaseY;
+			fraction *= InvBase;
+		}
+
+		offset = (offset - Vec2(0.5f)) * 2.0f;	
+
+		Vec2 texSize(1.0f / Vec2(F32(m_internalResolution.x()), F32(m_internalResolution.y()))); // Texel size
+		texSize *= 2.0f;
+		Vec2 subSample = offset * texSize; // In [-texSize, texSize]
+		subSample *= 0.5f; // In [-texSize / 2, texSize / 2]
+
+		jitterMatrix = Mat4::getIdentity();
+		jitterMatrix.setTranslationPart(Vec4(subSample, 0.0, 1.0));
+	}
+}
+
 Error Renderer::populateRenderGraph(RenderingContext& ctx)
 {
 	ctx.m_prevMatrices = m_prevMatrices;
@@ -302,7 +353,7 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	ctx.m_matrices.m_projection = ctx.m_renderQueue->m_projectionMatrix;
 	ctx.m_matrices.m_viewProjection = ctx.m_renderQueue->m_viewProjectionMatrix;
 
-	ctx.m_matrices.m_jitter = m_jitteredMats8x[m_frameCount & (m_jitteredMats8x.getSize() - 1)];
+	updateJitterMatrix(ctx.m_matrices.m_jitter);
 	ctx.m_matrices.m_projectionJitter = ctx.m_matrices.m_jitter * ctx.m_matrices.m_projection;
 	ctx.m_matrices.m_viewProjectionJitter = ctx.m_matrices.m_projectionJitter * ctx.m_matrices.m_view;
 	ctx.m_matrices.m_invertedViewProjectionJitter = ctx.m_matrices.m_viewProjectionJitter.getInverse();
@@ -362,7 +413,10 @@ Error Renderer::populateRenderGraph(RenderingContext& ctx)
 	m_indirectSpecular->populateRenderGraph(ctx);
 	m_indirectDiffuse->populateRenderGraph(ctx);
 	m_lightShading->populateRenderGraph(ctx);
-	m_temporalAA->populateRenderGraph(ctx);
+	if(!getUsingDLSS())
+	{
+		m_temporalAA->populateRenderGraph(ctx);
+	}
 	m_vrsSriGeneration->populateRenderGraph(ctx);
 	m_scale->populateRenderGraph(ctx);
 	m_downscaleBlur->populateRenderGraph(ctx);

+ 3 - 0
AnKi/Renderer/Renderer.h

@@ -56,6 +56,8 @@ public:
 		return m_rtShadows.isCreated();
 	}
 
+	Bool getUsingDLSS() const;
+
 	const UVec2& getInternalResolution() const
 	{
 		return m_internalResolution;
@@ -283,6 +285,7 @@ private:
 	ANKI_USE_RESULT Error initInternal(UVec2 swapchainSize);
 
 	void initJitteredMats();
+	void updateJitterMatrix(Mat4& jitterMatrix);
 }; // namespace anki
 /// @}
 

+ 73 - 27
AnKi/Renderer/Scale.cpp

@@ -8,6 +8,11 @@
 #include <AnKi/Renderer/TemporalAA.h>
 #include <AnKi/Core/ConfigSet.h>
 
+#include <AnKi/Renderer/LightShading.h>
+#include <AnKi/Renderer/MotionVectors.h>
+#include <AnKi/Renderer/GBuffer.h>
+#include <AnKi/Renderer/Tonemapping.h>
+
 #if ANKI_COMPILER_GCC_COMPATIBLE
 #	pragma GCC diagnostic push
 #	pragma GCC diagnostic ignored "-Wunused-function"
@@ -39,10 +44,9 @@ Error Scale::init()
 
 	const Bool preferCompute = getConfig().getRPreferCompute();
 	const U32 fsrQuality = getConfig().getRFsr();
-	const U32 dlssQuality = getConfig().getRDlss();
 	// Dlss and FSR are mutually exclusive
-	m_dlss = (dlssQuality != 0) && getGrManager().getDeviceCapabilities().m_dlssSupport;
-	m_fsr = (fsrQuality != 0) && !m_dlss;
+	Bool useDlss = m_r->getUsingDLSS();
+	m_fsr = (fsrQuality != 0) && !useDlss;
 
 	// Program
 	if(needsScaling)
@@ -60,12 +64,12 @@ Error Scale::init()
 		{
 			shaderFname = "ShaderBinaries/BlitCompute.ankiprogbin";
 		}
-		else if(!m_dlss)
+		else if(!useDlss)
 		{
 			shaderFname = "ShaderBinaries/BlitRaster.ankiprogbin";
 		}
 
-		if (m_dlss) 
+		if(useDlss) 
 		{
 			DLSSCtxInitInfo init{};
 			init.m_srcRes = m_r->getInternalResolution();
@@ -108,11 +112,13 @@ Error Scale::init()
 	}
 
 	// Descriptors
-	m_rtDesc = m_r->create2DRenderTargetDescription(
-		m_r->getPostProcessResolution().x(), m_r->getPostProcessResolution().y(),
-		(getGrManager().getDeviceCapabilities().m_unalignedBbpTextureFormats) ? Format::R8G8B8_UNORM
-																			  : Format::R8G8B8A8_UNORM,
-		"Scaled");
+	Format desiredScaledFormat =
+		useDlss ? m_r->getHdrFormat()
+				: ((getGrManager().getDeviceCapabilities().m_unalignedBbpTextureFormats) ? Format::R8G8B8_UNORM
+																						 : Format::R8G8B8A8_UNORM);
+	const char* rtName = (useDlss && needsScaling) ? "Scaled (DLSS)" : (m_fsr ? "Scaled (FSR)" : "Scaled");
+	m_rtDesc = m_r->create2DRenderTargetDescription(m_r->getPostProcessResolution().x(),
+													m_r->getPostProcessResolution().y(), desiredScaledFormat, rtName);
 	m_rtDesc.bake();
 
 	m_fbDescr.m_colorAttachmentCount = 1;
@@ -136,32 +142,48 @@ void Scale::populateRenderGraph(RenderingContext& ctx)
 	if(doScaling())
 	{
 		m_runCtx.m_scaledRt = rgraph.newRenderTarget(m_rtDesc);
-
-		if(preferCompute)
+		if(doDLSS())
 		{
-			ComputeRenderPassDescription& pass = ctx.m_renderGraphDescr.newComputeRenderPass("Scale");
-			pass.newDependency(
-				RenderPassDependency(m_r->getTemporalAA().getTonemappedRt(), TextureUsageBit::SAMPLED_COMPUTE));
+			ComputeRenderPassDescription& pass = ctx.m_renderGraphDescr.newComputeRenderPass("DLSS");
+			pass.newDependency(RenderPassDependency(m_r->getLightShading().getRt(), TextureUsageBit::SAMPLED_COMPUTE));
+			pass.newDependency(RenderPassDependency(m_r->getMotionVectors().getMotionVectorsRt(), TextureUsageBit::SAMPLED_COMPUTE));
+			pass.newDependency(RenderPassDependency(m_r->getTonemapping().getExposureRT(), TextureUsageBit::IMAGE_COMPUTE_READ));
+			pass.newDependency(RenderPassDependency(m_r->getGBuffer().getDepthRt(), TextureUsageBit::SAMPLED_COMPUTE, TextureSubresourceInfo(DepthStencilAspectBit::DEPTH)));
 			pass.newDependency(RenderPassDependency(m_runCtx.m_scaledRt, TextureUsageBit::IMAGE_COMPUTE_WRITE));
 
-			pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
-				runScaling(rgraphCtx);
+			pass.setWork([this, &ctx](RenderPassWorkContext& rgraphCtx) {
+				runDLSS(ctx, rgraphCtx);
 			});
 		}
 		else
-		{
-			GraphicsRenderPassDescription& pass = ctx.m_renderGraphDescr.newGraphicsRenderPass("Scale");
-			pass.setFramebufferInfo(m_fbDescr, {m_runCtx.m_scaledRt});
+		{		
+			if(preferCompute)
+			{
+				ComputeRenderPassDescription& pass = ctx.m_renderGraphDescr.newComputeRenderPass("Scale");
+				pass.newDependency(
+					RenderPassDependency(m_r->getTemporalAA().getTonemappedRt(), TextureUsageBit::SAMPLED_COMPUTE));
+				pass.newDependency(RenderPassDependency(m_runCtx.m_scaledRt, TextureUsageBit::IMAGE_COMPUTE_WRITE));
+
+				pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
+					runScaling(rgraphCtx);
+				});
+			}
+			else
+			{
+				GraphicsRenderPassDescription& pass = ctx.m_renderGraphDescr.newGraphicsRenderPass("Scale");
+				pass.setFramebufferInfo(m_fbDescr, {m_runCtx.m_scaledRt});
 
-			pass.newDependency(
-				RenderPassDependency(m_r->getTemporalAA().getTonemappedRt(), TextureUsageBit::SAMPLED_FRAGMENT));
-			pass.newDependency(
-				RenderPassDependency(m_runCtx.m_scaledRt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE));
+				pass.newDependency(
+					RenderPassDependency(m_r->getTemporalAA().getTonemappedRt(), TextureUsageBit::SAMPLED_FRAGMENT));
+				pass.newDependency(
+					RenderPassDependency(m_runCtx.m_scaledRt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE));
 
-			pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
-				runScaling(rgraphCtx);
-			});
+				pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
+					runScaling(rgraphCtx);
+				});
+			}
 		}
+
 	}
 
 	if(doSharpening())
@@ -303,4 +325,28 @@ void Scale::runSharpening(RenderPassWorkContext& rgraphCtx)
 	}
 }
 
+void Scale::runDLSS(RenderingContext& ctx, RenderPassWorkContext& rgraphCtx)
+{
+	Vec2 srcRes = static_cast<Vec2>(m_r->getInternalResolution());
+	Bool reset = m_r->getFrameCount() == 0; // TODO: Expose this better
+	Vec2 mvScale = srcRes; // UV space to Pixel space factor
+	// In [-texSize / 2, texSize / 2] -> sub-pixel space {-0.5, 0.5}
+	Vec2 jitterOffset = ctx.m_matrices.m_jitter.getTranslationPart().xy() * srcRes * 0.5f;
+	
+	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
+	const TexturePtr srcRT(rgraphCtx.getTargetTexture(m_r->getLightShading().getRt()));
+	const TexturePtr mvRT(rgraphCtx.getTargetTexture(m_r->getMotionVectors().getMotionVectorsRt()));
+	const TexturePtr depthRT(rgraphCtx.getTargetTexture(m_r->getGBuffer().getDepthRt()));
+	const TexturePtr dstRT(rgraphCtx.getTargetTexture(m_runCtx.m_scaledRt));
+	const TexturePtr exposureRT(rgraphCtx.getTargetTexture(m_r->getTonemapping().getExposureRT()));
+	
+	m_dlssCtx->upscale(cmdb, 
+		getGrManager().newTextureView(TextureViewInitInfo(srcRT, "DLSS_Src")), 
+		getGrManager().newTextureView(TextureViewInitInfo(dstRT, "DLSS_Dst")),
+		getGrManager().newTextureView(TextureViewInitInfo(mvRT, "DLSS_MV")), 
+		getGrManager().newTextureView(TextureViewInitInfo(depthRT, "DLSS_Depth")),
+	    getGrManager().newTextureView(TextureViewInitInfo(exposureRT, "DLSS_Exposure")), 
+		reset, jitterOffset, mvScale);
+}
+
 } // end namespace anki

+ 7 - 2
AnKi/Renderer/Scale.h

@@ -44,7 +44,6 @@ private:
 	RenderTargetDescription m_rtDesc;
 
 	Bool m_fsr = false;
-	Bool m_dlss = false;
 
 	class
 	{
@@ -55,15 +54,21 @@ private:
 
 	void runScaling(RenderPassWorkContext& rgraphCtx);
 	void runSharpening(RenderPassWorkContext& rgraphCtx);
+	void runDLSS(RenderingContext& ctx, RenderPassWorkContext& rgraphCtx);
 
 	Bool doSharpening() const
 	{
 		return m_sharpenProg.isCreated();
 	}
 
+	Bool doDLSS() const
+	{
+		return m_dlssCtx.isCreated();
+	}
+
 	Bool doScaling() const
 	{
-		return m_scaleProg.isCreated();
+		return m_scaleProg.isCreated() || doDLSS();
 	}
 };
 /// @}

+ 14 - 0
AnKi/Renderer/Tonemapping.cpp

@@ -43,6 +43,11 @@ Error Tonemapping::initInternal()
 		sizeof(Vec4), BufferUsageBit::ALL_STORAGE | BufferUsageBit::ALL_UNIFORM | BufferUsageBit::TRANSFER_DESTINATION,
 		BufferMapAccessBit::NONE, "AvgLum"));
 
+	// Create exposure texture
+	TextureUsageBit usage = TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::SAMPLED_COMPUTE | TextureUsageBit::IMAGE_COMPUTE_WRITE;
+	TextureInitInfo texinit = m_r->create2DRenderTargetInitInfo(1, 1, Format::R32_SFLOAT, usage, "Exposure 1x1");
+	m_exposure1x1 = m_r->createAndClearRenderTarget(texinit, TextureUsageBit::TRANSFER_DESTINATION);
+
 	CommandBufferInitInfo cmdbinit;
 	cmdbinit.m_flags = CommandBufferFlag::SMALL_BATCH | CommandBufferFlag::GENERAL_WORK;
 	CommandBufferPtr cmdb = getGrManager().newCommandBuffer(cmdbinit);
@@ -53,6 +58,12 @@ Error Tonemapping::initInternal()
 
 	*static_cast<Vec4*>(data) = Vec4(0.5);
 	cmdb->copyBufferToBuffer(handle.getBuffer(), handle.getOffset(), m_luminanceBuff, 0, handle.getRange());
+	
+	// TODO: Review
+	TextureSubresourceInfo subresource;
+	subresource = TextureSubresourceInfo(TextureSurfaceInfo(0, 0, 0, 0));
+	TextureViewPtr tmpView = getGrManager().newTextureView(TextureViewInitInfo(m_exposure1x1, subresource, "ExposureTmpView"));
+	cmdb->copyBufferToTextureView(handle.getBuffer(), handle.getOffset(), sizeof(F32), tmpView);
 
 	FencePtr fence;
 	cmdb->flush({}, &fence);
@@ -68,6 +79,8 @@ void Tonemapping::importRenderTargets(RenderingContext& ctx)
 	// read/write. To skip the barrier import it as read/write as well.
 	m_runCtx.m_buffHandle = ctx.m_renderGraphDescr.importBuffer(
 		m_luminanceBuff, BufferUsageBit::STORAGE_COMPUTE_READ | BufferUsageBit::STORAGE_COMPUTE_WRITE);
+	m_runCtx.m_exposureHandle = ctx.m_renderGraphDescr.importRenderTarget(m_exposure1x1, 
+		TextureUsageBit::IMAGE_COMPUTE_READ | TextureUsageBit::IMAGE_COMPUTE_WRITE);
 }
 
 void Tonemapping::populateRenderGraph(RenderingContext& ctx)
@@ -82,6 +95,7 @@ void Tonemapping::populateRenderGraph(RenderingContext& ctx)
 
 		cmdb->bindShaderProgram(m_grProg);
 		rgraphCtx.bindStorageBuffer(0, 1, m_runCtx.m_buffHandle);
+		rgraphCtx.bindImage(0, 2, m_runCtx.m_exposureHandle);
 
 		TextureSubresourceInfo inputTexSubresource;
 		inputTexSubresource.m_firstMipmap = m_inputTexMip;

+ 7 - 0
AnKi/Renderer/Tonemapping.h

@@ -33,17 +33,24 @@ public:
 		return m_runCtx.m_buffHandle;
 	}
 
+	RenderTargetHandle getExposureRT() const
+	{
+		return m_runCtx.m_exposureHandle;
+	}
+
 private:
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramPtr m_grProg;
 	U32 m_inputTexMip;
 
 	BufferPtr m_luminanceBuff;
+	TexturePtr m_exposure1x1;
 
 	class
 	{
 	public:
 		BufferHandle m_buffHandle;
+		RenderTargetHandle m_exposureHandle;
 	} m_runCtx;
 
 	ANKI_USE_RESULT Error initInternal();

+ 15 - 2
AnKi/Renderer/VrsSriGeneration.cpp

@@ -6,6 +6,8 @@
 #include <AnKi/Renderer/VrsSriGeneration.h>
 #include <AnKi/Renderer/Renderer.h>
 #include <AnKi/Renderer/TemporalAA.h>
+#include <AnKi/Renderer/LightShading.h>
+#include <AnKi/Renderer/Tonemapping.h>
 #include <AnKi/Core/ConfigSet.h>
 
 namespace anki {
@@ -76,6 +78,8 @@ Error VrsSriGeneration::initInternal()
 		variantInit.addMutation("SHARED_MEMORY", 1);
 	}
 
+	variantInit.addMutation("HDR_INPUT", m_r->getUsingDLSS() ? 1 : 0);
+
 	const ShaderProgramResourceVariant* variant;
 	m_prog->getOrCreateVariant(variantInit, variant);
 	m_grProg = variant->getProgram();
@@ -128,16 +132,25 @@ void VrsSriGeneration::populateRenderGraph(RenderingContext& ctx)
 	ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass("VRS SRI generation");
 
 	pass.newDependency(RenderPassDependency(m_runCtx.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE));
-	pass.newDependency(RenderPassDependency(m_r->getTemporalAA().getTonemappedRt(), TextureUsageBit::SAMPLED_COMPUTE));
+	Bool useTonemappedRT = !m_r->getUsingDLSS();
+	pass.newDependency(
+		RenderPassDependency(useTonemappedRT ? m_r->getTemporalAA().getTonemappedRt() : m_r->getLightShading().getRt(),
+							 TextureUsageBit::SAMPLED_COMPUTE));
 
 	pass.setWork([this](RenderPassWorkContext& rgraphCtx) {
 		CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 
 		cmdb->bindShaderProgram(m_grProg);
 
-		rgraphCtx.bindColorTexture(0, 0, m_r->getTemporalAA().getTonemappedRt());
+		Bool useTonemappedRT = !m_r->getUsingDLSS();
+		rgraphCtx.bindColorTexture(0, 0, useTonemappedRT ? m_r->getTemporalAA().getTonemappedRt() : m_r->getLightShading().getRt());
+
 		cmdb->bindSampler(0, 1, m_r->getSamplers().m_nearestNearestClamp);
 		rgraphCtx.bindImage(0, 2, m_runCtx.m_rt);
+		if(m_r->getUsingDLSS())
+		{
+			rgraphCtx.bindUniformBuffer(0, 3, m_r->getTonemapping().getAverageLuminanceBuffer());
+		}
 		const Vec4 pc(1.0f / Vec2(m_r->getInternalResolution()), getConfig().getRVrsThreshold(), 0.0f);
 		cmdb->setPushConstants(&pc, sizeof(pc));
 

+ 13 - 0
AnKi/Shaders/FinalComposite.ankiprog

@@ -5,6 +5,7 @@
 
 #pragma anki mutator BLUE_NOISE 0 1
 #pragma anki mutator BLOOM_ENABLED 0 1
+#pragma anki mutator APPLY_TONEMAPPING 0 1
 #pragma anki mutator DBG_ENABLED 0 1
 
 ANKI_SPECIALIZATION_CONSTANT_U32(LUT_SIZE, 0u);
@@ -30,10 +31,18 @@ layout(set = 0, binding = 5) uniform ANKI_RP texture3D u_lut;
 layout(set = 0, binding = 6) uniform ANKI_RP texture2D u_blueNoise;
 layout(set = 0, binding = 7) uniform texture2D u_motionVectorsRt;
 layout(set = 0, binding = 8) uniform texture2D u_depthRt;
+#if APPLY_TONEMAPPING
+#include <AnKi/Shaders/TonemappingFunctions.glsl>
+const U32 TONEMAPPING_SET = 0u;
+const U32 TONEMAPPING_BINDING = 9u;
+#include <AnKi/Shaders/TonemappingResources.glsl>
+#endif
+
 #if DBG_ENABLED
 layout(set = 0, binding = 9) uniform ANKI_RP texture2D u_dbgOutlineRt;
 #endif
 
+
 layout(push_constant, std140) uniform b_pc
 {
 	Vec3 u_padding0;
@@ -67,6 +76,10 @@ void main()
 		out_color = textureLod(u_lightShadingRt, u_linearAnyClampSampler, uv, 0.0).rgb;
 	}
 
+#if APPLY_TONEMAPPING
+	out_color = linearToSRgb(tonemap(out_color, u_exposureThreshold0));
+#endif
+
 #if BLOOM_ENABLED
 	const ANKI_RP Vec3 bloom = textureLod(u_ppsBloomLfRt, u_linearAnyClampSampler, uv, 0.0).rgb;
 	out_color += bloom;

+ 5 - 0
AnKi/Shaders/TonemappingAverageLuminance.ankiprog

@@ -24,6 +24,9 @@ layout(set = 0, binding = 0) uniform ANKI_RP texture2D u_tex;
 #define TONEMAPPING_BINDING 1
 #include <AnKi/Shaders/TonemappingResources.glsl>
 
+// Needed for DLSS
+layout(set = 0, binding = 2) uniform writeonly image2D u_outExposureImage;
+
 shared F32 s_avgLum[WORKGROUP_SIZE.x * WORKGROUP_SIZE.y];
 
 void main()
@@ -99,6 +102,8 @@ void main()
 
 		u_averageLuminance = finalAvgLum;
 		u_exposureThreshold0 = computeExposure(u_averageLuminance, 0.0);
+
+		imageStore(u_outExposureImage, IVec2(0,0), Vec4(u_exposureThreshold0));
 	}
 }
 #pragma anki end

+ 12 - 0
AnKi/Shaders/VrsSriGeneration.glsl

@@ -5,6 +5,7 @@
 
 #pragma anki mutator SRI_TEXEL_DIMENSION 8 16
 #pragma anki mutator SHARED_MEMORY 0 1
+#pragma anki mutator HDR_INPUT 0 1
 
 #include <AnKi/Shaders/Functions.glsl>
 #include <AnKi/Shaders/TonemappingFunctions.glsl>
@@ -12,6 +13,11 @@
 // Find the maximum luma derivative in x and y, relative to the average luma of the block.
 // Each thread handles a 2x2 region when using 8x8 VRS tiles and a 2x4 region when using 16x16 VRS tiles.
 
+#if HDR_INPUT
+const U32 TONEMAPPING_SET = 0u;
+const U32 TONEMAPPING_BINDING = 3u;
+#include <AnKi/Shaders/TonemappingResources.glsl>
+#endif
 layout(set = 0, binding = 0) uniform ANKI_RP texture2D u_inputTex;
 layout(set = 0, binding = 1) uniform sampler u_nearestClampSampler;
 
@@ -42,9 +48,15 @@ shared F32 s_averageLuma[SHARED_MEMORY_ENTRIES];
 shared Vec2 s_maxDerivative[SHARED_MEMORY_ENTRIES];
 #endif
 
+#if HDR_INPUT
+#define sampleLuma(offsetX, offsetY) \
+	computeLuminance( \
+		linearToSRgb(tonemap(textureLodOffset(sampler2D(u_inputTex, u_nearestClampSampler), uv, 0.0, IVec2(offsetX, offsetY)).xyz, u_exposureThreshold0)))
+#else
 #define sampleLuma(offsetX, offsetY) \
 	computeLuminance( \
 		textureLodOffset(sampler2D(u_inputTex, u_nearestClampSampler), uv, 0.0, IVec2(offsetX, offsetY)).xyz)
+#endif
 
 void main()
 {

+ 1 - 1
ThirdParty/nvngx_dlss_sdk/CMakeLists.txt

@@ -19,7 +19,7 @@ if (WIN32)
     )
 
     # TODO: DO NOT USE/PUSH THE DEV DLLs 
-    set_target_properties(AnkiNgx PROPERTIES IMPORTED_LOCATION "${NGX_SDK_ROOT}/lib/Windows_x86_64/dev/nvngx_dlss.dll")
+    set_target_properties(AnkiNgx PROPERTIES IMPORTED_LOCATION "${NGX_SDK_ROOT}/lib/Windows_x86_64/rel/nvngx_dlss.dll")
 else ()
     add_library(AnkiNgx IMPORTED STATIC GLOBAL)