Browse Source

Decrease BW cost of SSAO

Panagiotis Christopoulos Charitos 7 years ago
parent
commit
0556ef1c5b
3 changed files with 74 additions and 104 deletions
  1. 39 52
      programs/Ssao.ankiprog
  2. 30 50
      src/anki/renderer/Ssao.cpp
  3. 5 2
      src/anki/renderer/Ssao.h

+ 39 - 52
programs/Ssao.ankiprog

@@ -5,6 +5,10 @@ Code licensed under the BSD License.
 http://www.anki3d.org/LICENSE
 -->
 <shaderProgram>
+	<mutators>
+		<mutator name="USE_NORMAL" values="0 1"/>
+	</mutators>
+
 	<shaders>
 		<shader type="vert">
 			<source><![CDATA[
@@ -19,7 +23,7 @@ http://www.anki3d.org/LICENSE
 				<input name="RADIUS" type="float" const="1"/>
 				<input name="BIAS" type="float" const="1"/>
 				<input name="STRENGTH" type="float" const="1"/>
-				<input name="HISTORY_FEEDBACK" type="float" const="1"/>
+				<input name="SAMPLE_COUNT" type="uint" const="1"/>
 			</inputs>
 
 			<source><![CDATA[
@@ -35,18 +39,16 @@ layout(ANKI_UBO_BINDING(0, 0), std140, row_major) uniform _blk
 {
 	vec4 u_unprojectionParams;
 	vec4 u_projectionMat;
-	vec4 u_noiseLayerPad3;
-	mat4 u_prevViewProjMatMulInvViewProjMat;
 	mat3 u_viewRotMat;
 };
 
-#define u_noiseLayer u_noiseLayerPad3.x
-
 layout(ANKI_TEX_BINDING(0, 0)) uniform sampler2D u_mMsDepthRt;
-layout(ANKI_TEX_BINDING(0, 1)) uniform sampler2D u_msRt;
-layout(ANKI_TEX_BINDING(0, 2)) uniform sampler2DArray u_noiseMap;
-layout(ANKI_TEX_BINDING(0, 3)) uniform sampler2D u_prevSsaoRt;
+layout(ANKI_TEX_BINDING(0, 1)) uniform sampler2DArray u_noiseMap;
+#if USE_NORMAL
+layout(ANKI_TEX_BINDING(0, 2)) uniform sampler2D u_msRt;
+#endif
 
+#if USE_NORMAL
 // Get normal
 vec3 readNormal(in vec2 uv)
 {
@@ -55,19 +57,20 @@ vec3 readNormal(in vec2 uv)
 	normal = u_viewRotMat * normal;
 	return normal;
 }
+#endif
 
 // Read the noise tex
-vec3 readRandom(in vec2 uv)
+vec3 readRandom(vec2 uv, float layer)
 {
 	const vec2 tmp = vec2(float(FB_SIZE.x) / float(NOISE_MAP_SIZE), float(FB_SIZE.y) / float(NOISE_MAP_SIZE));
-	vec3 r = texture(u_noiseMap, vec3(tmp * uv, u_noiseLayer)).rgb;
+	vec3 r = texture(u_noiseMap, vec3(tmp * uv, layer)).rgb;
 	return r;
 }
 
 // Returns the Z of the position in view space
 float readZ(in vec2 uv)
 {
-	float depth = texture(u_mMsDepthRt, uv).r;
+	float depth = textureLod(u_mMsDepthRt, uv, 0.0).r;
 	float z = u_unprojectionParams.z / (u_unprojectionParams.w + depth);
 	return z;
 }
@@ -77,8 +80,7 @@ vec3 readPosition(in vec2 uv)
 {
 	vec3 fragPosVspace;
 	fragPosVspace.z = readZ(uv);
-
-	fragPosVspace.xy = (2.0 * uv - 1.0) * u_unprojectionParams.xy * fragPosVspace.z;
+	fragPosVspace.xy = UV_TO_NDC(uv) * u_unprojectionParams.xy * fragPosVspace.z;
 
 	return fragPosVspace;
 }
@@ -90,33 +92,16 @@ vec4 project(vec4 point)
 
 void main(void)
 {
-	vec2 ndc = in_uv * 2.0 - 1.0;
-	float depth = texture(u_mMsDepthRt, in_uv).r;
+	vec2 ndc = UV_TO_NDC(in_uv);
 
-	vec3 origin;
-	origin.z = u_unprojectionParams.z / (u_unprojectionParams.w + depth);
-	origin.xy = ndc * u_unprojectionParams.xy * origin.z;
+	// Compute origin
+	vec3 origin = readPosition(in_uv);
 
+	// Get normal
+#if USE_NORMAL
 	vec3 normal = readNormal(in_uv);
-
-	// Get rand factors
-	vec3 randFactors = readRandom(in_uv);
-
-	// Get prev SSAO
-	vec4 clip = u_prevViewProjMatMulInvViewProjMat * vec4(vec3(ndc, depth), 1.0);
-	clip.xy /= clip.w;
-	vec2 oldUv = NDC_TO_UV(clip.xy);
-	float prevSsao = textureLod(u_prevSsaoRt, oldUv, 0.0).r;
-
-	// Compute the history blend. If clip falls outside NDC then it's 1.0 (use only current SSAO term) and if it's
-	// inside NDC then use the HISTORY_FEEDBACK value
-#if 0
-	vec2 posNdc = abs(clip.xy);
-	float historyFeedback = max(posNdc.x, posNdc.y);
-	historyFeedback = min(floor(historyFeedback), 1.0 - HISTORY_FEEDBACK);
-	historyFeedback += HISTORY_FEEDBACK;
 #else
-	const float historyFeedback = HISTORY_FEEDBACK;
+	vec3 normal = normalize(cross(dFdx(origin), dFdy(origin)));
 #endif
 
 	// Find the projected radius
@@ -125,23 +110,25 @@ void main(void)
 	vec2 projSphereLimit2 = projSphereLimit.xy / projSphereLimit.w;
 	float projRadius = length(projSphereLimit2 - ndc);
 
-	// Find a random point around the current NDC. Make sure that the sides fall inside the screen.
-#if 0
-	vec2 startXY = -in_uv; // range [0,-1]
-	startXY += randFactors.xy; // for the left side it's [0,1] for the center [0,0], right [-1,0]
-	vec2 finalDiskPoint = ndc + startXY * projRadius;
-#else
-	vec2 finalDiskPoint = ndc + (randFactors.xy - 0.5) * projRadius;
-#endif
-
-	// Compute factor
-	vec3 s = readPosition(NDC_TO_UV(finalDiskPoint));
-	vec3 u = s - origin;
-	float ssao = max(dot(normal, u) + BIAS, 0.0) / max(dot(u, u), EPSILON);
+	// Loop to compute
+	float ssao = 0.0;
+	for(uint i = 0; i < SAMPLE_COUNT; ++i)
+	{
+		// Compute disk
+		vec3 randFactors = readRandom(in_uv, float(i));
+		vec2 dir = normalize(randFactors.xy * 2.0 - 1.0);
+		float radius = projRadius * (randFactors.z * 0.85 + 0.15);
+		vec2 finalDiskPoint = ndc + dir * radius;
+
+		// Compute factor
+		vec3 s = readPosition(NDC_TO_UV(finalDiskPoint));
+		vec3 u = s - origin;
+		ssao += max(dot(normal, u) + BIAS, EPSILON) / max(dot(u, u), EPSILON);
+	}
+
+	ssao *= (1.0 / float(SAMPLE_COUNT));
 	ssao = 1.0 - ssao * STRENGTH;
-
-	// Blend
-	out_color = mix(prevSsao, ssao, historyFeedback);
+	out_color = ssao;
 }
 			]]></source>
 		</shader>

+ 30 - 50
src/anki/renderer/Ssao.cpp

@@ -26,15 +26,18 @@ Error Ssao::initMain(const ConfigSet& config)
 	// Shader
 	ANKI_CHECK(getResourceManager().loadResource("programs/Ssao.ankiprog", m_main.m_prog));
 
+	ShaderProgramResourceMutationInitList<1> mutators(m_main.m_prog);
+	mutators.add("USE_NORMAL", 0u);
+
 	ShaderProgramResourceConstantValueInitList<6> consts(m_main.m_prog);
 	consts.add("NOISE_MAP_SIZE", U32(m_main.m_noiseTex->getWidth()))
 		.add("FB_SIZE", UVec2(m_width, m_height))
-		.add("RADIUS", 3.0f)
+		.add("RADIUS", 2.5f)
 		.add("BIAS", 0.0f)
-		.add("STRENGTH", 2.0f)
-		.add("HISTORY_FEEDBACK", 1.0f / 4.0f);
+		.add("STRENGTH", 2.5f)
+		.add("SAMPLE_COUNT", 4u);
 	const ShaderProgramResourceVariant* variant;
-	m_main.m_prog->getOrCreateVariant(consts.get(), variant);
+	m_main.m_prog->getOrCreateVariant(mutators.get(), consts.get(), variant);
 	m_main.m_grProg = variant->getProgram();
 
 	return Error::NONE;
@@ -83,19 +86,13 @@ Error Ssao::init(const ConfigSet& config)
 
 	ANKI_R_LOGI("Initializing SSAO. Size %ux%u", m_width, m_height);
 
-	static const Array<const char*, 2> RT_NAMES = {{"SsaoMain #1", "SsaoMain #2"}};
-	for(U i = 0; i < 2; ++i)
-	{
-		// RT
-		TextureInitInfo texinit = m_r->create2DRenderTargetInitInfo(m_width,
-			m_height,
-			Ssao::RT_PIXEL_FORMAT,
-			TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE | TextureUsageBit::CLEAR,
-			&RT_NAMES[i][0]);
-		texinit.m_initialUsage = TextureUsageBit::SAMPLED_FRAGMENT;
-
-		m_rtTextures[i] = m_r->createAndClearRenderTarget(texinit);
-	}
+	// RT
+	m_rtDescr = m_r->create2DRenderTargetDescription(m_width,
+		m_height,
+		Ssao::RT_PIXEL_FORMAT,
+		TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE | TextureUsageBit::CLEAR,
+		"SSAO");
+	m_rtDescr.bake();
 
 	// FB descr
 	m_fbDescr.m_colorAttachmentCount = 1;
@@ -131,20 +128,16 @@ void Ssao::runMain(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx
 
 	rgraphCtx.bindTextureAndSampler(
 		0, 0, m_r->getDepthDownscale().getHiZRt(), HIZ_QUARTER_DEPTH, m_r->getLinearSampler());
-	rgraphCtx.bindColorTextureAndSampler(0, 1, m_r->getGBuffer().getColorRt(2), m_r->getLinearSampler());
 	cmdb->bindTextureAndSampler(0,
-		2,
+		1,
 		m_main.m_noiseTex->getGrTextureView(),
 		m_r->getTrilinearRepeatSampler(),
 		TextureUsageBit::SAMPLED_FRAGMENT);
-	rgraphCtx.bindColorTextureAndSampler(0, 3, m_runCtx.m_rts[(m_r->getFrameCount() + 1) & 1], m_r->getLinearSampler());
 
 	struct Unis
 	{
 		Vec4 m_unprojectionParams;
 		Vec4 m_projectionMat;
-		Vec4 m_noiseLayerPad3;
-		Mat4 m_prevViewProjMatMulInvViewProjMat;
 		Mat3x4 m_viewRotMat;
 	};
 
@@ -152,9 +145,6 @@ void Ssao::runMain(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx
 	const Mat4& pmat = ctx.m_renderQueue->m_projectionMatrix;
 	unis->m_unprojectionParams = ctx.m_unprojParams;
 	unis->m_projectionMat = Vec4(pmat(0, 0), pmat(1, 1), pmat(2, 2), pmat(2, 3));
-	unis->m_noiseLayerPad3 = Vec4(m_r->getFrameCount() % m_main.m_noiseTex->getLayerCount(), 0.0, 0.0, 0.0);
-	unis->m_prevViewProjMatMulInvViewProjMat =
-		ctx.m_prevViewProjMat * ctx.m_renderQueue->m_viewProjectionMatrix.getInverse();
 	unis->m_viewRotMat = Mat3x4(ctx.m_renderQueue->m_viewMatrix.getRotationPart());
 
 	drawQuad(cmdb);
@@ -166,7 +156,7 @@ void Ssao::runHBlur(RenderPassWorkContext& rgraphCtx)
 
 	cmdb->setViewport(0, 0, m_width, m_height);
 	cmdb->bindShaderProgram(m_hblur.m_grProg);
-	rgraphCtx.bindColorTextureAndSampler(0, 0, m_runCtx.m_rts[m_r->getFrameCount() & 1], m_r->getLinearSampler());
+	rgraphCtx.bindColorTextureAndSampler(0, 0, m_runCtx.m_rts[0], m_r->getLinearSampler());
 	rgraphCtx.bindTextureAndSampler(
 		0, 1, m_r->getDepthDownscale().getHiZRt(), HIZ_QUARTER_DEPTH, m_r->getLinearSampler());
 	drawQuad(cmdb);
@@ -178,7 +168,7 @@ void Ssao::runVBlur(RenderPassWorkContext& rgraphCtx)
 
 	cmdb->setViewport(0, 0, m_width, m_height);
 	cmdb->bindShaderProgram(m_vblur.m_grProg);
-	rgraphCtx.bindColorTextureAndSampler(0, 0, m_runCtx.m_rts[(m_r->getFrameCount() + 1) & 1], m_r->getLinearSampler());
+	rgraphCtx.bindColorTextureAndSampler(0, 0, m_runCtx.m_rts[1], m_r->getLinearSampler());
 	rgraphCtx.bindTextureAndSampler(
 		0, 1, m_r->getDepthDownscale().getHiZRt(), HIZ_QUARTER_DEPTH, m_r->getLinearSampler());
 	drawQuad(cmdb);
@@ -190,25 +180,20 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 
 	// Create RTs
-	const U rtToRenderIdx = m_r->getFrameCount() & 1;
-	m_runCtx.m_rts[rtToRenderIdx] =
-		rgraph.importRenderTarget("SSAO #1", m_rtTextures[rtToRenderIdx], TextureUsageBit::NONE);
-	const U rtToReadIdx = !rtToRenderIdx;
-	m_runCtx.m_rts[rtToReadIdx] =
-		rgraph.importRenderTarget("SSAO #2", m_rtTextures[rtToReadIdx], TextureUsageBit::SAMPLED_FRAGMENT);
+	m_runCtx.m_rts[0] = rgraph.newRenderTarget(m_rtDescr);
+	m_runCtx.m_rts[1] = rgraph.newRenderTarget(m_rtDescr);
 
 	// Create main render pass
 	{
 		GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SSAO main");
 
 		pass.setWork(runMainCallback, this, 0);
-		pass.setFramebufferInfo(m_fbDescr, {{m_runCtx.m_rts[rtToRenderIdx]}}, {});
+		pass.setFramebufferInfo(m_fbDescr, {{m_runCtx.m_rts[0]}}, {});
 
 		pass.newConsumer({m_r->getGBuffer().getColorRt(2), TextureUsageBit::SAMPLED_FRAGMENT});
-		pass.newConsumer({m_runCtx.m_rts[rtToRenderIdx], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
-		pass.newConsumer({m_runCtx.m_rts[rtToReadIdx], TextureUsageBit::SAMPLED_FRAGMENT});
 		pass.newConsumer({m_r->getDepthDownscale().getHiZRt(), TextureUsageBit::SAMPLED_FRAGMENT, HIZ_QUARTER_DEPTH});
-		pass.newProducer({m_runCtx.m_rts[rtToRenderIdx], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
+		pass.newConsumer({m_runCtx.m_rts[0], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
+		pass.newProducer({m_runCtx.m_rts[0], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
 	}
 
 	// Create HBlur pass
@@ -216,12 +201,12 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 		GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SSAO hblur");
 
 		pass.setWork(runHBlurCallback, this, 0);
-		pass.setFramebufferInfo(m_fbDescr, {{m_runCtx.m_rts[rtToReadIdx]}}, {});
+		pass.setFramebufferInfo(m_fbDescr, {{m_runCtx.m_rts[1]}}, {});
 
-		pass.newConsumer({m_runCtx.m_rts[rtToReadIdx], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
-		pass.newConsumer({m_runCtx.m_rts[rtToRenderIdx], TextureUsageBit::SAMPLED_FRAGMENT});
+		pass.newConsumer({m_runCtx.m_rts[1], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
+		pass.newConsumer({m_runCtx.m_rts[0], TextureUsageBit::SAMPLED_FRAGMENT});
 		pass.newConsumer({m_r->getDepthDownscale().getHiZRt(), TextureUsageBit::SAMPLED_FRAGMENT, HIZ_QUARTER_DEPTH});
-		pass.newProducer({m_runCtx.m_rts[rtToReadIdx], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
+		pass.newProducer({m_runCtx.m_rts[1], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
 	}
 
 	// Create VBlur pass
@@ -229,18 +214,13 @@ void Ssao::populateRenderGraph(RenderingContext& ctx)
 		GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass("SSAO vblur");
 
 		pass.setWork(runVBlurCallback, this, 0);
-		pass.setFramebufferInfo(m_fbDescr, {{m_runCtx.m_rts[rtToRenderIdx]}}, {});
+		pass.setFramebufferInfo(m_fbDescr, {{m_runCtx.m_rts[0]}}, {});
 
-		pass.newConsumer({m_runCtx.m_rts[rtToRenderIdx], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
-		pass.newConsumer({m_runCtx.m_rts[rtToReadIdx], TextureUsageBit::SAMPLED_FRAGMENT});
+		pass.newConsumer({m_runCtx.m_rts[0], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
+		pass.newConsumer({m_runCtx.m_rts[1], TextureUsageBit::SAMPLED_FRAGMENT});
 		pass.newConsumer({m_r->getDepthDownscale().getHiZRt(), TextureUsageBit::SAMPLED_FRAGMENT, HIZ_QUARTER_DEPTH});
-		pass.newProducer({m_runCtx.m_rts[rtToRenderIdx], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
+		pass.newProducer({m_runCtx.m_rts[0], TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE});
 	}
 }
 
-RenderTargetHandle Ssao::getRt() const
-{
-	return m_runCtx.m_rts[m_r->getFrameCount() & 1];
-}
-
 } // end namespace anki

+ 5 - 2
src/anki/renderer/Ssao.h

@@ -33,7 +33,10 @@ anki_internal:
 	/// Populate the rendergraph.
 	void populateRenderGraph(RenderingContext& ctx);
 
-	RenderTargetHandle getRt() const;
+	RenderTargetHandle getRt() const
+	{
+		return m_runCtx.m_rts[0];
+	}
 
 private:
 	U32 m_width, m_height;
@@ -67,7 +70,7 @@ private:
 		const RenderingContext* m_ctx = nullptr;
 	} m_runCtx; ///< Runtime context.
 
-	Array<TexturePtr, 2> m_rtTextures;
+	RenderTargetDescription m_rtDescr;
 	FramebufferDescription m_fbDescr;
 
 	ANKI_USE_RESULT Error initMain(const ConfigSet& set);