Browse Source

Add checkerboard rendering to improve perf on SSR

Panagiotis Christopoulos Charitos 7 years ago
parent
commit
30fbde7fdc
4 changed files with 73 additions and 18 deletions
  1. 0 3
      shaders/LightShading.glslp
  2. 59 9
      shaders/Ssr.glslp
  3. 13 5
      src/anki/renderer/Ssr.cpp
  4. 1 1
      src/anki/renderer/Ssr.h

+ 0 - 3
shaders/LightShading.glslp

@@ -207,9 +207,6 @@ void main()
 		Vec3 env = envBRDF(gbuffer.m_specular, gbuffer.m_roughness, u_integrationLut, NoV);
 
 		out_color += indirectColor * gbuffer.m_diffuse + finalRefl * env;
-
-		// out_color += indirectColor * gbuffer.m_diffuse + ssr.rgb;
-		// out_color = ssr.rgb;
 	}
 
 #if 0

+ 59 - 9
shaders/Ssr.glslp

@@ -3,6 +3,14 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
+// if VARIANT==0 then the checkerboard pattern is (render on 'v'):
+// -----
+// |v| |
+// | |v|
+// -----
+
+#pragma anki mutator VARIANT 0 1
+
 #pragma anki input const UVec2 FB_SIZE
 #pragma anki input const UVec2 WORKGROUP_SIZE
 #pragma anki input const U32 MAX_STEPS
@@ -32,6 +40,9 @@ layout(ANKI_UBO_BINDING(0, 0), row_major) uniform u_
 #define u_invProjMat u_unis.m_invProjMat
 #define u_normalMat u_unis.m_normalMat
 
+// Temp buffer to hold the indirect color
+shared Vec4 s_pixels[WORKGROUP_SIZE.y][WORKGROUP_SIZE.x];
+
 // Note: All calculations in view space
 // It returns the UV coordinates of the reflection (xy) and the contrubution factor (z)
 Vec3 raymarch(Vec3 r, Vec3 n, Vec3 viewPos, Vec2 uv, F32 depth)
@@ -125,12 +136,22 @@ Vec3 raymarch(Vec3 r, Vec3 n, Vec3 viewPos, Vec2 uv, F32 depth)
 
 void main()
 {
-	if(gl_GlobalInvocationID.x >= FB_SIZE.x || gl_GlobalInvocationID.y >= FB_SIZE.y)
+	// Compute a global invocation ID that takes the checkerboard pattern into account
+	IVec2 fixedInvocationId = IVec2(gl_GlobalInvocationID.xy);
+	fixedInvocationId.x *= 2;
+#if VARIANT == 0
+	fixedInvocationId.x += ((fixedInvocationId.y + 1) & 1);
+#else
+	fixedInvocationId.x += ((fixedInvocationId.y + 0) & 1);
+#endif
+
+	if(fixedInvocationId.x >= I32(FB_SIZE.x) || fixedInvocationId.y >= I32(FB_SIZE.y))
 	{
+		// Skip threads outside the writable image
 		return;
 	}
 
-	Vec2 uv = (Vec2(gl_GlobalInvocationID.xy) + 0.5) / Vec2(FB_SIZE);
+	Vec2 uv = (Vec2(fixedInvocationId.xy) + 0.5) / Vec2(FB_SIZE);
 
 	// Read part of the G-buffer
 	F32 roughness = readRoughnessFromGBuffer(u_gbufferRt1, uv);
@@ -154,20 +175,49 @@ void main()
 	F32 factor = ssr.z;
 
 	// Read the reflection
-	Vec3 reflColor;
-	if(factor > 0.0)
+	Vec4 outColor;
+	ANKI_BRANCH if(factor > 0.0)
 	{
 		// Read the refl
 		F32 lod = F32(LIGHT_BUFFER_MIP_COUNT - 1u) * roughness;
-		reflColor = textureLod(u_lightBufferRt, reflUv, lod).rgb;
-		reflColor = clamp(reflColor, 0.0, FLT_MAX); // Fix the value just in case
+		outColor.rgb = textureLod(u_lightBufferRt, reflUv, lod).rgb;
+		outColor.rgb = clamp(outColor.rgb, 0.0, FLT_MAX); // Fix the value just in case
+		outColor.rgb *= factor;
+		outColor.a = 1.0 - factor;
 	}
 	else
 	{
-		reflColor = Vec3(0.0);
+		outColor = Vec4(0.0, 0.0, 0.0, 1.0);
 	}
 
-	// Store to the image
-	imageStore(out_img, ivec2(gl_GlobalInvocationID.xy), Vec4(reflColor * factor, factor));
+	// Store the color for the resolve
+	s_pixels[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = outColor;
+
+	// Wait for all the threads to store their stuff
+	memoryBarrierShared();
+	barrier();
+
+	// Compute the missing pixel by resolving with the right or left neighbour
+	IVec2 readPixel, storePixel;
+	readPixel.y = I32(gl_LocalInvocationID.y);
+	storePixel.y = fixedInvocationId.y;
+
+#if VARIANT == 0
+	Bool pickRightNeighbour = (fixedInvocationId.y & 1) == 1;
+#else
+	Bool pickRightNeighbour = (fixedInvocationId.y & 1) == 0;
+#endif
+	I32 xOffset = (pickRightNeighbour) ? 1 : -1;
+
+	readPixel.x = I32(gl_LocalInvocationID.x) + xOffset;
+	readPixel.x = clamp(readPixel.x, 0, I32(WORKGROUP_SIZE.x - 1));
+
+	storePixel.x = fixedInvocationId.x + xOffset;
+
+	Vec4 missingColor = (outColor + s_pixels[readPixel.y][readPixel.x]) * 0.5; // average
+
+	// Store both the pixels
+	imageStore(out_img, fixedInvocationId, outColor);
+	imageStore(out_img, storePixel, missingColor);
 }
 #pragma anki end

+ 13 - 5
src/anki/renderer/Ssr.cpp

@@ -48,9 +48,16 @@ Error Ssr::initInternal(const ConfigSet& cfg)
 	consts.add("MAX_STEPS", U32(64));
 	consts.add("LIGHT_BUFFER_MIP_COUNT", U32(m_r->getDownscaleBlur().getMipmapCount()));
 
+	ShaderProgramResourceMutationInitList<1> mutators(m_prog);
+	mutators.add("VARIANT", 0);
+
 	const ShaderProgramResourceVariant* variant;
-	m_prog->getOrCreateVariant(consts.get(), variant);
-	m_grProg = variant->getProgram();
+	m_prog->getOrCreateVariant(mutators.get(), consts.get(), variant);
+	m_grProg[0] = variant->getProgram();
+
+	mutators[0].m_value = 1;
+	m_prog->getOrCreateVariant(mutators.get(), consts.get(), variant);
+	m_grProg[1] = variant->getProgram();
 
 	return Error::NONE;
 }
@@ -81,7 +88,7 @@ void Ssr::run(RenderPassWorkContext& rgraphCtx)
 {
 	RenderingContext& ctx = *m_runCtx.m_ctx;
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
-	cmdb->bindShaderProgram(m_grProg);
+	cmdb->bindShaderProgram(m_grProg[m_r->getFrameCount() & 1u]);
 
 	// Bind textures
 	rgraphCtx.bindColorTextureAndSampler(0, 0, m_r->getGBuffer().getColorRt(1), m_r->getLinearSampler());
@@ -104,8 +111,9 @@ void Ssr::run(RenderPassWorkContext& rgraphCtx)
 	unis->m_normalMat = Mat3x4(ctx.m_renderQueue->m_viewMatrix.getRotationPart());
 
 	// Dispatch
-	dispatchPPCompute(
-		cmdb, m_workgroupSize[0], m_workgroupSize[1], m_r->getWidth() / SSR_FRACTION, m_r->getHeight() / SSR_FRACTION);
+	const U sizeX = (m_r->getWidth() / SSR_FRACTION + m_workgroupSize[0] - 1) / m_workgroupSize[0];
+	const U sizeY = (m_r->getHeight() / SSR_FRACTION + m_workgroupSize[1] - 1) / m_workgroupSize[1];
+	cmdb->dispatchCompute(sizeX / 2, sizeY, 1);
 }
 
 } // end namespace anki

+ 1 - 1
src/anki/renderer/Ssr.h

@@ -36,7 +36,7 @@ anki_internal:
 
 private:
 	ShaderProgramResourcePtr m_prog;
-	ShaderProgramPtr m_grProg;
+	Array<ShaderProgramPtr, 2> m_grProg;
 
 	RenderTargetDescription m_rtDescr;