Browse Source

Add checkerboard rendering to improve perf on SSR

Panagiotis Christopoulos Charitos 7 years ago
parent
commit
30fbde7fdc
4 changed files with 73 additions and 18 deletions
  1. 0 3
      shaders/LightShading.glslp
  2. 59 9
      shaders/Ssr.glslp
  3. 13 5
      src/anki/renderer/Ssr.cpp
  4. 1 1
      src/anki/renderer/Ssr.h

+ 0 - 3
shaders/LightShading.glslp

@@ -207,9 +207,6 @@ void main()
 		Vec3 env = envBRDF(gbuffer.m_specular, gbuffer.m_roughness, u_integrationLut, NoV);
 		Vec3 env = envBRDF(gbuffer.m_specular, gbuffer.m_roughness, u_integrationLut, NoV);
 
 
 		out_color += indirectColor * gbuffer.m_diffuse + finalRefl * env;
 		out_color += indirectColor * gbuffer.m_diffuse + finalRefl * env;
-
-		// out_color += indirectColor * gbuffer.m_diffuse + ssr.rgb;
-		// out_color = ssr.rgb;
 	}
 	}
 
 
 #if 0
 #if 0

+ 59 - 9
shaders/Ssr.glslp

@@ -3,6 +3,14 @@
 // Code licensed under the BSD License.
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 // http://www.anki3d.org/LICENSE
 
 
+// if VARIANT==0 then the checkerboard pattern is (render on 'v'):
+// -----
+// |v| |
+// | |v|
+// -----
+
+#pragma anki mutator VARIANT 0 1
+
 #pragma anki input const UVec2 FB_SIZE
 #pragma anki input const UVec2 FB_SIZE
 #pragma anki input const UVec2 WORKGROUP_SIZE
 #pragma anki input const UVec2 WORKGROUP_SIZE
 #pragma anki input const U32 MAX_STEPS
 #pragma anki input const U32 MAX_STEPS
@@ -32,6 +40,9 @@ layout(ANKI_UBO_BINDING(0, 0), row_major) uniform u_
 #define u_invProjMat u_unis.m_invProjMat
 #define u_invProjMat u_unis.m_invProjMat
 #define u_normalMat u_unis.m_normalMat
 #define u_normalMat u_unis.m_normalMat
 
 
+// Temp buffer to hold the indirect color
+shared Vec4 s_pixels[WORKGROUP_SIZE.y][WORKGROUP_SIZE.x];
+
 // Note: All calculations in view space
 // Note: All calculations in view space
 // It returns the UV coordinates of the reflection (xy) and the contrubution factor (z)
 // It returns the UV coordinates of the reflection (xy) and the contrubution factor (z)
 Vec3 raymarch(Vec3 r, Vec3 n, Vec3 viewPos, Vec2 uv, F32 depth)
 Vec3 raymarch(Vec3 r, Vec3 n, Vec3 viewPos, Vec2 uv, F32 depth)
@@ -125,12 +136,22 @@ Vec3 raymarch(Vec3 r, Vec3 n, Vec3 viewPos, Vec2 uv, F32 depth)
 
 
 void main()
 void main()
 {
 {
-	if(gl_GlobalInvocationID.x >= FB_SIZE.x || gl_GlobalInvocationID.y >= FB_SIZE.y)
+	// Compute a global invocation ID that takes the checkerboard pattern into account
+	IVec2 fixedInvocationId = IVec2(gl_GlobalInvocationID.xy);
+	fixedInvocationId.x *= 2;
+#if VARIANT == 0
+	fixedInvocationId.x += ((fixedInvocationId.y + 1) & 1);
+#else
+	fixedInvocationId.x += ((fixedInvocationId.y + 0) & 1);
+#endif
+
+	if(fixedInvocationId.x >= I32(FB_SIZE.x) || fixedInvocationId.y >= I32(FB_SIZE.y))
 	{
 	{
+		// Skip threads outside the writable image
 		return;
 		return;
 	}
 	}
 
 
-	Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(FB_SIZE);
+	Vec2 uv = (Vec2(fixedInvocationId.xy) + 0.5) / Vec2(FB_SIZE);
 
 
 	// Read part of the G-buffer
 	// Read part of the G-buffer
 	F32 roughness = readRoughnessFromGBuffer(u_gbufferRt1, uv);
 	F32 roughness = readRoughnessFromGBuffer(u_gbufferRt1, uv);
@@ -154,20 +175,49 @@ void main()
 	F32 factor = ssr.z;
 	F32 factor = ssr.z;
 
 
 	// Read the reflection
 	// Read the reflection
-	Vec3 reflColor;
-	if(factor > 0.0)
+	Vec4 outColor;
+	ANKI_BRANCH if(factor > 0.0)
 	{
 	{
 		// Read the refl
 		// Read the refl
 		F32 lod = F32(LIGHT_BUFFER_MIP_COUNT - 1u) * roughness;
 		F32 lod = F32(LIGHT_BUFFER_MIP_COUNT - 1u) * roughness;
-		reflColor = textureLod(u_lightBufferRt, reflUv, lod).rgb;
-		reflColor = clamp(reflColor, 0.0, FLT_MAX); // Fix the value just in case
+		outColor.rgb = textureLod(u_lightBufferRt, reflUv, lod).rgb;
+		outColor.rgb = clamp(outColor.rgb, 0.0, FLT_MAX); // Fix the value just in case
+		outColor.rgb *= factor;
+		outColor.a = 1.0 - factor;
 	}
 	}
 	else
 	else
 	{
 	{
-		reflColor = Vec3(0.0);
+		outColor = Vec4(0.0, 0.0, 0.0, 1.0);
 	}
 	}
 
 
-	// Store to the image
-	imageStore(out_img, ivec2(gl_GlobalInvocationID.xy), Vec4(reflColor * factor, factor));
+	// Store the color for the resolve
+	s_pixels[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = outColor;
+
+	// Wait for all the threads to store their stuff
+	memoryBarrierShared();
+	barrier();
+
+	// Compute the missing pixel by resolving with the right or left neighbour
+	IVec2 readPixel, storePixel;
+	readPixel.y = I32(gl_LocalInvocationID.y);
+	storePixel.y = fixedInvocationId.y;
+
+#if VARIANT == 0
+	Bool pickRightNeighbour = (fixedInvocationId.y & 1) == 1;
+#else
+	Bool pickRightNeighbour = (fixedInvocationId.y & 1) == 0;
+#endif
+	I32 xOffset = (pickRightNeighbour) ? 1 : -1;
+
+	readPixel.x = I32(gl_LocalInvocationID.x) + xOffset;
+	readPixel.x = clamp(readPixel.x, 0, I32(WORKGROUP_SIZE.x - 1));
+
+	storePixel.x = fixedInvocationId.x + xOffset;
+
+	Vec4 missingColor = (outColor + s_pixels[readPixel.y][readPixel.x]) * 0.5; // average
+
+	// Store both the pixels
+	imageStore(out_img, fixedInvocationId, outColor);
+	imageStore(out_img, storePixel, missingColor);
 }
 }
 #pragma anki end
 #pragma anki end

+ 13 - 5
src/anki/renderer/Ssr.cpp

@@ -48,9 +48,16 @@ Error Ssr::initInternal(const ConfigSet& cfg)
 	consts.add("MAX_STEPS", U32(64));
 	consts.add("MAX_STEPS", U32(64));
 	consts.add("LIGHT_BUFFER_MIP_COUNT", U32(m_r->getDownscaleBlur().getMipmapCount()));
 	consts.add("LIGHT_BUFFER_MIP_COUNT", U32(m_r->getDownscaleBlur().getMipmapCount()));
 
 
+	ShaderProgramResourceMutationInitList<1> mutators(m_prog);
+	mutators.add("VARIANT", 0);
+
 	const ShaderProgramResourceVariant* variant;
 	const ShaderProgramResourceVariant* variant;
-	m_prog->getOrCreateVariant(consts.get(), variant);
-	m_grProg = variant->getProgram();
+	m_prog->getOrCreateVariant(mutators.get(), consts.get(), variant);
+	m_grProg[0] = variant->getProgram();
+
+	mutators[0].m_value = 1;
+	m_prog->getOrCreateVariant(mutators.get(), consts.get(), variant);
+	m_grProg[1] = variant->getProgram();
 
 
 	return Error::NONE;
 	return Error::NONE;
 }
 }
@@ -81,7 +88,7 @@ void Ssr::run(RenderPassWorkContext& rgraphCtx)
 {
 {
 	RenderingContext& ctx = *m_runCtx.m_ctx;
 	RenderingContext& ctx = *m_runCtx.m_ctx;
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
-	cmdb->bindShaderProgram(m_grProg);
+	cmdb->bindShaderProgram(m_grProg[m_r->getFrameCount() & 1u]);
 
 
 	// Bind textures
 	// Bind textures
 	rgraphCtx.bindColorTextureAndSampler(0, 0, m_r->getGBuffer().getColorRt(1), m_r->getLinearSampler());
 	rgraphCtx.bindColorTextureAndSampler(0, 0, m_r->getGBuffer().getColorRt(1), m_r->getLinearSampler());
@@ -104,8 +111,9 @@ void Ssr::run(RenderPassWorkContext& rgraphCtx)
 	unis->m_normalMat = Mat3x4(ctx.m_renderQueue->m_viewMatrix.getRotationPart());
 	unis->m_normalMat = Mat3x4(ctx.m_renderQueue->m_viewMatrix.getRotationPart());
 
 
 	// Dispatch
 	// Dispatch
-	dispatchPPCompute(
-		cmdb, m_workgroupSize[0], m_workgroupSize[1], m_r->getWidth() / SSR_FRACTION, m_r->getHeight() / SSR_FRACTION);
+	const U sizeX = (m_r->getWidth() / SSR_FRACTION + m_workgroupSize[0] - 1) / m_workgroupSize[0];
+	const U sizeY = (m_r->getHeight() / SSR_FRACTION + m_workgroupSize[1] - 1) / m_workgroupSize[1];
+	cmdb->dispatchCompute(sizeX / 2, sizeY, 1);
 }
 }
 
 
 } // end namespace anki
 } // end namespace anki

+ 1 - 1
src/anki/renderer/Ssr.h

@@ -36,7 +36,7 @@ anki_internal:
 
 
 private:
 private:
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramResourcePtr m_prog;
-	ShaderProgramPtr m_grProg;
+	Array<ShaderProgramPtr, 2> m_grProg;
 
 
 	RenderTargetDescription m_rtDescr;
 	RenderTargetDescription m_rtDescr;