Ver Fonte

Add 1/4 resolution support for RT irradiance

Panagiotis Christopoulos Charitos há 5 meses atrás
pai
commit
c6b0ebb966

+ 23 - 19
AnKi/Renderer/IndirectDiffuseClipmaps.cpp

@@ -33,10 +33,10 @@ Error IndirectDiffuseClipmaps::init()
 {
 	ANKI_CHECK(RtMaterialFetchRendererObject::init());
 
-	m_halfRtDesc =
-		getRenderer().create2DRenderTargetDescription(getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y(),
-													  getRenderer().getHdrFormat(), "IndirectDiffuseClipmap: Half");
-	m_halfRtDesc.bake();
+	m_lowRezRtDesc = getRenderer().create2DRenderTargetDescription(
+		getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y() / (!g_indirectDiffuseClipmapApplyHighQuality + 1),
+		getRenderer().getHdrFormat(), "IndirectDiffuseClipmap: Half");
+	m_lowRezRtDesc.bake();
 
 	m_fullRtDesc = getRenderer().create2DRenderTargetDescription(getRenderer().getInternalResolution().x(), getRenderer().getInternalResolution().y(),
 																 getRenderer().getHdrFormat(), "IndirectDiffuseClipmap: Full");
@@ -114,10 +114,11 @@ Error IndirectDiffuseClipmaps::init()
 		m_avgIrradianceVolumes[clipmap] = getRenderer().createAndClearRenderTarget(volumeInit, TextureUsageBit::kSrvCompute);
 	}
 
-	const Array<SubMutation, 4> mutation = {{{"GPU_WAVE_SIZE", MutatorValue(GrManager::getSingleton().getDeviceCapabilities().m_maxWaveSize)},
+	const Array<SubMutation, 5> mutation = {{{"GPU_WAVE_SIZE", MutatorValue(GrManager::getSingleton().getDeviceCapabilities().m_maxWaveSize)},
 											 {"RADIANCE_OCTAHEDRON_MAP_SIZE", MutatorValue(g_indirectDiffuseClipmapRadianceOctMapSize)},
 											 {"IRRADIANCE_OCTAHEDRON_MAP_SIZE", MutatorValue(g_indirectDiffuseClipmapIrradianceOctMapSize)},
-											 {"RT_MATERIAL_FETCH_CLIPMAP", 0}}};
+											 {"RT_MATERIAL_FETCH_CLIPMAP", 0},
+											 {"SPATIAL_RECONSTRUCT_TYPE", !g_indirectDiffuseClipmapApplyHighQuality}}};
 
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_applyGiGrProg, "Apply"));
 	ANKI_CHECK(loadShaderProgram("ShaderBinaries/IndirectDiffuseClipmaps.ankiprogbin", mutation, m_prog, m_visProbesGrProg, "VisualizeProbes"));
@@ -197,7 +198,7 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 	RenderGraphBuilder& rgraph = ctx.m_renderGraphDescr;
 
 	const RenderTargetHandle rtResultHandle = rgraph.newRenderTarget(m_rtResultRtDesc);
-	const RenderTargetHandle halfHandle = rgraph.newRenderTarget(m_halfRtDesc);
+	const RenderTargetHandle lowRezRt = rgraph.newRenderTarget(m_lowRezRtDesc);
 	const RenderTargetHandle fullHandle = rgraph.newRenderTarget(m_fullRtDesc);
 
 	Array<RenderTargetHandle, kIndirectDiffuseClipmapCount>& radianceVolumes = m_runCtx.m_handles.m_radianceVolumes;
@@ -422,9 +423,9 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 			pass.newTextureDependency(distanceMomentsVolumes[clipmap], TextureUsageBit::kSrvTraceRays);
 		}
 
-		pass.newTextureDependency(halfHandle, TextureUsageBit::kUavTraceRays);
+		pass.newTextureDependency(lowRezRt, TextureUsageBit::kUavTraceRays);
 
-		pass.setWork([this, &ctx, sbtBuffer, halfHandle](RenderPassWorkContext& rgraphCtx) {
+		pass.setWork([this, &ctx, sbtBuffer, lowRezRt](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 			cmdb.bindShaderProgram(m_rtLibraryGrProg.get());
@@ -478,14 +479,15 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindSampler(1, 2, getRenderer().getSamplers().m_trilinearClampShadow.get());
 			cmdb.bindSampler(2, 2, getRenderer().getSamplers().m_trilinearRepeat.get());
 
-			rgraphCtx.bindUav(0, 2, halfHandle);
+			rgraphCtx.bindUav(0, 2, lowRezRt);
 			cmdb.bindUav(1, 2, TextureView(getDummyGpuResources().m_texture2DUav.get(), TextureSubresourceDesc::firstSurface()));
 
 			const Vec4 consts(g_indirectDiffuseClipmapFirstBounceRayDistance);
 			cmdb.setFastConstants(&consts, sizeof(consts));
 
 			cmdb.traceRays(sbtBuffer, m_sbtRecordSize, GpuSceneArrays::RenderableBoundingVolumeRt::getSingleton().getElementCount(), 1,
-						   getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y(), 1);
+						   getRenderer().getInternalResolution().x() / 2,
+						   getRenderer().getInternalResolution().y() / (!g_indirectDiffuseClipmapApplyHighQuality + 1), 1);
 		});
 	}
 	else
@@ -501,9 +503,9 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 			pass.newTextureDependency(distanceMomentsVolumes[i], TextureUsageBit::kSrvCompute);
 			pass.newTextureDependency(avgIrradianceVolumes[i], TextureUsageBit::kSrvCompute);
 		}
-		pass.newTextureDependency(halfHandle, TextureUsageBit::kUavCompute);
+		pass.newTextureDependency(lowRezRt, TextureUsageBit::kUavCompute);
 
-		pass.setWork([this, &ctx, halfHandle](RenderPassWorkContext& rgraphCtx) {
+		pass.setWork([this, &ctx, lowRezRt](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 			cmdb.bindShaderProgram(m_applyGiGrProg.get());
@@ -512,13 +514,14 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 			rgraphCtx.bindSrv(1, 0, getGBuffer().getColorRt(2));
 			cmdb.bindSrv(2, 0, TextureView(&m_blueNoiseImg->getTexture(), TextureSubresourceDesc::firstSurface()));
 
-			rgraphCtx.bindUav(0, 0, halfHandle);
+			rgraphCtx.bindUav(0, 0, lowRezRt);
 
 			cmdb.bindConstantBuffer(0, 0, ctx.m_globalRenderingConstantsBuffer);
 
 			cmdb.bindSampler(0, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
 
-			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y());
+			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2,
+							  getRenderer().getInternalResolution().y() / (!g_indirectDiffuseClipmapApplyHighQuality + 1));
 		});
 	}
 
@@ -527,19 +530,20 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 		NonGraphicsRenderPass& pass = rgraph.newNonGraphicsRenderPass("IndirectDiffuseClipmaps: Spatial reconstruct");
 
 		pass.newTextureDependency(getGBuffer().getDepthRt(), TextureUsageBit::kSrvCompute);
-		pass.newTextureDependency(halfHandle, TextureUsageBit::kSrvCompute);
+		pass.newTextureDependency(lowRezRt, TextureUsageBit::kSrvCompute);
 		pass.newTextureDependency(fullHandle, TextureUsageBit::kUavCompute);
 
-		pass.setWork([this, halfHandle, fullHandle](RenderPassWorkContext& rgraphCtx) {
+		pass.setWork([this, lowRezRt, fullHandle](RenderPassWorkContext& rgraphCtx) {
 			CommandBuffer& cmdb = *rgraphCtx.m_commandBuffer;
 
 			cmdb.bindShaderProgram(m_spatialReconstructGrProg.get());
 
-			rgraphCtx.bindSrv(0, 0, halfHandle);
+			rgraphCtx.bindSrv(0, 0, lowRezRt);
 			rgraphCtx.bindSrv(1, 0, getGBuffer().getDepthRt());
 			rgraphCtx.bindUav(0, 0, fullHandle);
 
-			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2, getRenderer().getInternalResolution().y());
+			dispatchPPCompute(cmdb, 8, 8, getRenderer().getInternalResolution().x() / 2,
+							  getRenderer().getInternalResolution().y() / (!g_indirectDiffuseClipmapApplyHighQuality + 1));
 		});
 	}
 

+ 3 - 1
AnKi/Renderer/IndirectDiffuseClipmaps.h

@@ -59,6 +59,8 @@ inline NumericCVar<U32> g_indirectDiffuseClipmapIrradianceOctMapSize("R", "Indir
 
 inline NumericCVar<F32> g_indirectDiffuseClipmapFirstBounceRayDistance("R", "IndirectDiffuseClipmapFirstBounceRayDistance", 0.0f, 0.0f, 10000.0f,
 																	   "For the 1st bounce shoot rays instead of sampling the clipmaps");
+inline BoolCVar g_indirectDiffuseClipmapApplyHighQuality("R", "IndirectDiffuseClipmapApplyHighQuality", false,
+														 "If true use 1/2 resolution else use 1/4");
 
 /// @memberof IndirectDiffuseClipmaps
 class IndirectDiffuseClipmapsRenderTargetHandles
@@ -111,7 +113,7 @@ private:
 	Array<TexturePtr, kIndirectDiffuseClipmapCount> m_avgIrradianceVolumes;
 
 	RenderTargetDesc m_rtResultRtDesc;
-	RenderTargetDesc m_halfRtDesc;
+	RenderTargetDesc m_lowRezRtDesc;
 	RenderTargetDesc m_fullRtDesc;
 
 	IndirectDiffuseClipmapConstants m_consts;

+ 147 - 21
AnKi/Shaders/IndirectDiffuseClipmaps.ankiprog

@@ -9,12 +9,13 @@
 #pragma anki mutator RADIANCE_OCTAHEDRON_MAP_SIZE 8 10 12 14 16 18 20
 #pragma anki mutator IRRADIANCE_OCTAHEDRON_MAP_SIZE 4 5 6
 #pragma anki mutator RT_MATERIAL_FETCH_CLIPMAP 0 1
+#pragma anki mutator SPATIAL_RECONSTRUCT_TYPE 0 1
 
-#pragma anki technique RtMaterialFetch rgen mutators RT_MATERIAL_FETCH_CLIPMAP
+#pragma anki technique RtMaterialFetch rgen mutators RT_MATERIAL_FETCH_CLIPMAP SPATIAL_RECONSTRUCT_TYPE
 #pragma anki technique PopulateCaches comp mutators RADIANCE_OCTAHEDRON_MAP_SIZE
 #pragma anki technique ComputeIrradiance comp mutators GPU_WAVE_SIZE RADIANCE_OCTAHEDRON_MAP_SIZE IRRADIANCE_OCTAHEDRON_MAP_SIZE
-#pragma anki technique Apply comp mutators
-#pragma anki technique SpatialReconstruct comp mutators
+#pragma anki technique Apply comp mutators SPATIAL_RECONSTRUCT_TYPE
+#pragma anki technique SpatialReconstruct comp mutators SPATIAL_RECONSTRUCT_TYPE
 #pragma anki technique TemporalDenoise comp mutators
 #pragma anki technique VisualizeProbes vert pixel mutators
 
@@ -121,8 +122,13 @@ ANKI_FAST_CONSTANTS(Consts, g_consts)
 
 [Shader("raygeneration")] void main()
 {
+#		if SPATIAL_RECONSTRUCT_TYPE == 0
 	const UVec2 fullCoord = UVec2(DispatchRaysIndex().x * 2u + (DispatchRaysIndex().y & 1u), DispatchRaysIndex().y);
 	const Vec2 uv = (fullCoord + 0.5) / (DispatchRaysDimensions().xy * UVec2(2, 1));
+#		else
+	const UVec2 fullCoord = DispatchRaysIndex().xy * 2u;
+	const Vec2 uv = (fullCoord + 0.5) / (DispatchRaysDimensions().xy * 2);
+#		endif
 
 	const F32 depth = TEX(g_depthTex, fullCoord).x;
 	const Vec4 rt2 = TEX(g_gbufferRt2, fullCoord);
@@ -522,7 +528,11 @@ SamplerState g_linearAnyRepeatSampler : register(s0);
 	const Vec2 fullViewportSize = halfViewportSize * Vec2(2.0, 1.0);
 
 	const UVec2 realSvDispatchThreadId = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
+#	if SPATIAL_RECONSTRUCT_TYPE == 0
 	const Vec2 coord = Vec2(realSvDispatchThreadId.x * 2u + (realSvDispatchThreadId.y & 1u), realSvDispatchThreadId.y);
+#	else
+	const Vec2 coord = Vec2(realSvDispatchThreadId * 2u);
+#	endif
 
 	if(any(coord >= fullViewportSize))
 	{
@@ -574,15 +584,133 @@ Texture2D<F32> g_depthTex : register(t1);
 
 RWTexture2D<Vec4> g_outTex : register(u0);
 
-[numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
+void appendSample(F32 refDepth, F32 sampleDepth, Vec3 sample, inout Vec3 sampleSum, inout F32 weightSum, F32 extraWeight = 1.0)
+{
+	const F32 weight = calculateBilateralWeightDepth<F32>(refDepth, sampleDepth, 1.0) * extraWeight;
+	sampleSum += sample * weight;
+	weightSum += weight;
+}
+
+void normalizeSum(F32 weightSum, inout Vec3 sampleSum)
+{
+	if(weightSum > kEpsilonF32 * 10.0)
+	{
+		sampleSum /= weightSum;
+	}
+	else
+	{
+		sampleSum = 0.0;
+	}
+}
+
+void oneIn4Reconstruct(IVec2 svDispatchThreadId)
 {
 	IVec2 viewportSize;
 	g_outTex.GetDimensions(viewportSize.x, viewportSize.y);
+	const IVec2 quarterViewportSize = viewportSize / 2;
+	const IVec2 quarterCoord = svDispatchThreadId; // Coord in quarter rez
+
+	// This is the pattern we are trying to fill
+	// +---+---+
+	// | 0 | 1 |
+	// +---+---+
+	// | 3 | 2 |
+	// +---+---+
+
+	// Gather the color of the neighbours and their depth
+	Vec3 samples[2][2];
+	F32 sampleDepths[2][2];
+	F32 maxLuma = 0.0;
+	IVec2 maxLumaPixel = 0;
+	[unroll] for(U32 x = 0; x < 2; ++x)
+	{
+		[unroll] for(U32 y = 0; y < 2; ++y)
+		{
+			IVec2 coord = quarterCoord + IVec2(x, y);
+			coord = min(coord, quarterViewportSize - 1);
+			samples[x][y] = TEX(g_inTex, coord);
 
-	const IVec2 realSvDispatchThreadId = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
+			const F32 luma = computeLuminance(samples[x][y]);
+			if(luma > maxLuma)
+			{
+				maxLuma = luma;
+				maxLumaPixel = IVec2(x, y);
+			}
+
+			coord = quarterCoord + IVec2(x, y);
+			coord *= 2;
+			sampleDepths[x][y] = TEX(g_depthTex, coord);
+		}
+	}
 
-	const IVec2 filledCoord = IVec2(realSvDispatchThreadId.x * 2 + (realSvDispatchThreadId.y & 1), realSvDispatchThreadId.y);
-	const IVec2 toBeFilledCoord = IVec2(realSvDispatchThreadId.x * 2 + ((realSvDispatchThreadId.y + 1) & 1), realSvDispatchThreadId.y);
+	// Remove fireflies
+	F32 avgLumaOf3 = 0.0;
+	[unroll] for(U32 x = 0; x < 2; ++x)
+	{
+		[unroll] for(U32 y = 0; y < 2; ++y)
+		{
+			if(any(maxLumaPixel != IVec2(x, y)))
+			{
+				const F32 luma = computeLuminance(samples[x][y]);
+				avgLumaOf3 += luma / 3.0;
+			}
+		}
+	}
+
+	if(maxLuma > avgLumaOf3 * 5.0)
+	{
+		// Firefly, tone it down
+		samples[maxLumaPixel.x][maxLumaPixel.y] *= avgLumaOf3 / maxLuma;
+	}
+
+	// 0 is already filled, just write it
+	IVec2 coord = quarterCoord * 2;
+	TEX(g_outTex, coord) = Vec4(samples[0][0], 0.0);
+
+	// For 2 use 4 samples
+	coord = quarterCoord * 2 + 1;
+	F32 refDepth = TEX(g_depthTex, coord);
+	Vec3 sampleSum = 0.0;
+	F32 weightSum = 0.0;
+	appendSample(refDepth, sampleDepths[0][0], samples[0][0], sampleSum, weightSum);
+	appendSample(refDepth, sampleDepths[1][0], samples[1][0], sampleSum, weightSum);
+	appendSample(refDepth, sampleDepths[1][1], samples[1][1], sampleSum, weightSum);
+	appendSample(refDepth, sampleDepths[0][1], samples[0][1], sampleSum, weightSum);
+	normalizeSum(weightSum, sampleSum);
+	TEX(g_outTex, coord) = Vec4(sampleSum, 0.0);
+	const Vec3 sample2 = sampleSum;
+	const F32 depth2 = refDepth;
+
+	// For 1 use 3 samples
+	coord = quarterCoord * 2 + IVec2(1, 0);
+	refDepth = TEX(g_depthTex, coord);
+	sampleSum = 0.0;
+	weightSum = 0.0;
+	appendSample(refDepth, sampleDepths[0][0], samples[0][0], sampleSum, weightSum, 1.0);
+	appendSample(refDepth, sampleDepths[1][0], samples[1][0], sampleSum, weightSum, 1.0);
+	appendSample(refDepth, depth2, sample2, sampleSum, weightSum, 0.5); // Less weight on that since it's reconstructed
+	normalizeSum(weightSum, sampleSum);
+	TEX(g_outTex, coord) = Vec4(sampleSum, 0.0);
+
+	// For 4 use 3 samples
+	coord = quarterCoord * 2 + IVec2(0, 1);
+	refDepth = TEX(g_depthTex, coord);
+	sampleSum = 0.0;
+	weightSum = 0.0;
+	appendSample(refDepth, sampleDepths[0][0], samples[0][0], sampleSum, weightSum, 1.0);
+	appendSample(refDepth, sampleDepths[0][1], samples[0][1], sampleSum, weightSum, 1.0);
+	appendSample(refDepth, depth2, sample2, sampleSum, weightSum, 0.5); // Less weight on that since it's reconstructed
+	normalizeSum(weightSum, sampleSum);
+	TEX(g_outTex, coord) = Vec4(sampleSum, 0.0);
+}
+
+void checkerboardReconstruct(IVec2 svDispatchThreadId)
+{
+	IVec2 viewportSize;
+	g_outTex.GetDimensions(viewportSize.x, viewportSize.y);
+
+	const IVec2 filledCoord = IVec2(svDispatchThreadId.x * 2 + (svDispatchThreadId.y & 1), svDispatchThreadId.y);
+	const IVec2 toBeFilledCoord = IVec2(svDispatchThreadId.x * 2 + ((svDispatchThreadId.y + 1) & 1), svDispatchThreadId.y);
 
 	const F32 refDepth = TEX(g_depthTex, toBeFilledCoord);
 
@@ -595,13 +723,9 @@ RWTexture2D<Vec4> g_outTex : register(u0);
 		if(all(sampleCoord >= 0) && all(sampleCoord < viewportSize))
 		{
 			const F32 sampleDepth = TEX(g_depthTex, sampleCoord);
-
 			const Vec3 sample = TEX(g_inTex, IVec2(sampleCoord.x / 2, sampleCoord.y));
 
-			const F32 weight = calculateBilateralWeightDepth<F32>(refDepth, sampleDepth, 1.0);
-			weightSum += weight;
-
-			toBeFilledColor += weight * sample;
+			appendSample(refDepth, sampleDepth, sample, toBeFilledColor, weightSum);
 
 			if(all(sampleCoord == filledCoord))
 			{
@@ -610,17 +734,19 @@ RWTexture2D<Vec4> g_outTex : register(u0);
 		}
 	}
 
-	if(weightSum > kEpsilonF32 * 10.0)
-	{
-		toBeFilledColor /= weightSum;
-	}
-	else
-	{
-		toBeFilledColor = 0.0;
-	}
-
+	normalizeSum(weightSum, toBeFilledColor);
 	TEX(g_outTex, toBeFilledCoord) = Vec4(toBeFilledColor, 0.0);
 }
+
+[numthreads(64, 1, 1)] void main(COMPUTE_ARGS)
+{
+	const IVec2 realSvDispatchThreadId = getOptimalDispatchThreadId8x8Amd(svGroupIndex, svGroupId.xy);
+#	if SPATIAL_RECONSTRUCT_TYPE == 0
+	checkerboardReconstruct(realSvDispatchThreadId);
+#	else
+	oneIn4Reconstruct(realSvDispatchThreadId);
+#	endif
+}
 #endif
 
 // ===========================================================================