Browse Source

Optimizations & reflection work

Panagiotis Christopoulos Charitos 10 years ago
parent
commit
3977900159

+ 4 - 2
include/anki/renderer/Clusterer.h

@@ -83,9 +83,10 @@ public:
 	void bin(const CollisionShape& cs, const Aabb& csBox,
 		ClustererTestResult& rez) const;
 
-	F32 getDivisor() const
+	/// A value that will be used in shaders to calculate the cluster index.
+	F32 getShaderMagicValue() const
 	{
-		return m_calcNearOpt;
+		return m_shaderMagicVal;
 	}
 
 	U getClusterCountX() const
@@ -134,6 +135,7 @@ public:
 	F32 m_near = 0.0;
 	F32 m_far = 0.0;
 	F32 m_calcNearOpt = 0.0;
+	F32 m_shaderMagicVal = 0.0;
 
 	F32 calcNear(U k) const;
 

+ 5 - 0
include/anki/renderer/Ir.h

@@ -59,6 +59,11 @@ anki_internal:
 		return m_cubemapArrMipCount;
 	}
 
+	const Clusterer& getClusterer() const
+	{
+		return m_clusterer;
+	}
+
 private:
 	class CacheEntry
 	{

+ 1 - 0
include/anki/renderer/Renderer.h

@@ -111,6 +111,7 @@ public:
 		Array<CommandBufferPtr, RENDERER_COMMAND_BUFFERS_COUNT>& cmdBuff);
 
 anki_internal:
+	/// WARNING: If you change the tile size you need to change some shaders
 	static const U TILE_SIZE = 64;
 
 	void getOutputFramebuffer(FramebufferPtr& outputFb, U32& width, U32& height)

+ 0 - 4
shaders/Clusterer.glsl

@@ -26,10 +26,6 @@ uint computeClusterIndexUsingTileIdx(float near, float clustererMagic,
 uint computeClusterIndexUsingFragCoord(float near, float clustererMagic,
 	float zVSpace, uint tileCountX)
 {
-#if TILE_SIZE != 64
-#	error Not designed for this tile size
-#endif
-
 	// Compute tile idx
 	uvec2 f = uvec2(gl_FragCoord.xy) >> 6;
 	uint tileIdx = f.y * tileCountX + f.x;

+ 9 - 9
shaders/FsCommonFrag.glsl

@@ -7,6 +7,7 @@
 #pragma anki include "shaders/Common.glsl"
 #pragma anki include "shaders/MsFsCommon.glsl"
 #pragma anki include "shaders/LinearDepth.glsl"
+#pragma anki include "shaders/Clusterer.glsl"
 
 // Global resources
 layout(TEX_BINDING(1, 0)) uniform sampler2D anki_msDepthRt;
@@ -151,14 +152,13 @@ vec3 computeLightColor(vec3 diffCol)
 	uint pointLightsCount;
 	uint spotLightsCount;
 	{
-		uint k = calcClusterSplit(fragPos.z);
+		uint clusterIdx = computeClusterIndexUsingFragCoord(
+			u_lightingUniforms.nearFarClustererMagicPad1.x,
+			u_lightingUniforms.nearFarClustererMagicPad1.z,
+			fragPos.z,
+			u_lightingUniforms.tileCountPad1.x);
 
-		vec2 tilef = gl_FragCoord.xy / float(TILE_SIZE);
-		uint tile = uint(tilef.y) * u_lightingUniforms.tileCountPad1.x
-			+ uint(tilef.x);
-
-		uint cluster =
-			u_clusters[tile + k * u_lightingUniforms.tileCountPad1.z];
+		uint cluster = u_clusters[clusterIdx];
 
 		lightOffset = cluster >> 16u;
 		pointLightsCount = (cluster >> 8u) & 0xFFu;
@@ -276,8 +276,8 @@ void fog(in sampler2D depthMap, in vec3 color, in float fogScale)
 
 	if(depth < 1.0)
 	{
-		float zNear = u_lightingUniforms.nearFarClustererDivisor.x;
-		float zFar = u_lightingUniforms.nearFarClustererDivisor.y;
+		float zNear = u_lightingUniforms.nearFarClustererMagicPad1.x;
+		float zFar = u_lightingUniforms.nearFarClustererMagicPad1.y;
 		vec2 linearDepths = (2.0 * zNear)
 			/ (zFar + zNear - vec2(depth, gl_FragCoord.z) * (zFar - zNear));
 

+ 7 - 30
shaders/ImageReflections.glsl

@@ -8,7 +8,7 @@
 #ifndef ANKI_SHADERS_IMAGE_REFLECTIONS_GLSL
 #define ANKI_SHADERS_IMAGE_REFLECTIONS_GLSL
 
-#pragma anki include "shaders/Common.glsl"
+#pragma anki include "shaders/Clusterer.glsl"
 
 // Representation of a reflection probe
 struct ReflectionProbe
@@ -24,7 +24,7 @@ layout(std140, row_major, SS_BINDING(IMAGE_REFLECTIONS_SET,
 	IMAGE_REFLECTIONS_FIRST_SS_BINDING)) readonly buffer _irs1
 {
 	mat3 u_invViewRotation;
-	vec4 u_nearClusterDivisorPad2;
+	vec4 u_nearClusterMagicPad2;
 	ReflectionProbe u_reflectionProbes[];
 };
 
@@ -110,37 +110,14 @@ vec3 readReflection(in uint clusterIndex, in vec3 posVSpace,
 	return color;
 }
 
-//==============================================================================
-uint computeClusterIndex(in vec3 posVSpace)
-{
-#if TILE_SIZE == 64
-	// Compute tile idx
-	uint tileX = uint(gl_FragCoord.x) >> 6;
-	uint tileY = uint(gl_FragCoord.y) >> 6;
-
-	const uint TILE_COUNT_X = (WIDTH / TILE_SIZE);
-	uint tileIdx = tileY * TILE_COUNT_X + tileX;
-
-	// Calc split
-	float zVspace = -posVSpace.z;
-	float fk = sqrt(
-		(zVspace - u_nearClusterDivisorPad2.x) / u_nearClusterDivisorPad2.y);
-	uint k = uint(fk);
-
-	// Finally
-	const uint TILE_COUNT = TILE_COUNT_X * (HEIGHT / TILE_SIZE);
-	uint clusterIdx = tileIdx + k * TILE_COUNT;
-
-	return clusterIdx;
-#else
-#	error Not designed for this tile size
-#endif
-}
-
 //==============================================================================
 vec3 doImageReflections(in vec3 posVSpace, in vec3 r, in float lod)
 {
-	uint clusterIdx = computeClusterIndex(posVSpace);
+	uint clusterIdx = computeClusterIndexUsingFragCoord(
+		u_nearClusterMagicPad2.x,
+		u_nearClusterMagicPad2.y,
+		posVSpace.z,
+		TILE_COUNT_X);
 	return readReflection(clusterIdx, posVSpace, r, lod);
 }
 

+ 9 - 4
shaders/IsLp.frag.glsl

@@ -5,6 +5,7 @@
 
 #pragma anki type frag
 #pragma anki include "shaders/Pack.glsl"
+#pragma anki include "shaders/Clusterer.glsl"
 
 #define LIGHT_SET 0
 #define LIGHT_SS_BINDING 0
@@ -27,7 +28,7 @@ layout(location = 0) out vec3 out_color;
 
 #pragma anki include "shaders/LightFunctions.glsl"
 
-const uint TILE_COUNT = TILES_COUNT_X * TILES_COUNT_Y;
+const uint TILE_COUNT = TILE_COUNT_X * TILE_COUNT_Y;
 
 //==============================================================================
 // Return frag pos in view space
@@ -93,9 +94,13 @@ void main()
 		+ diffCol * emission;
 
 	// Get counts and offsets
-	uint k = calcClusterSplit(fragPos.z);
-	uint clusterIndex = in_instanceId + k * TILE_COUNT;
-	uint cluster = u_clusters[clusterIndex];
+	uint clusterIdx = computeClusterIndexUsingTileIdx(
+		u_lightingUniforms.nearFarClustererMagicPad1.x,
+		u_lightingUniforms.nearFarClustererMagicPad1.z,
+		fragPos.z,
+		in_instanceId);
+
+	uint cluster = u_clusters[clusterIdx];
 	uint lightOffset = cluster >> 16u;
 	uint pointLightsCount = (cluster >> 8u) & 0xFFu;
 	uint spotLightsCount = cluster & 0xFFu;

+ 3 - 3
shaders/IsLp.vert.glsl

@@ -25,13 +25,13 @@ void main()
 	float instIdF = float(gl_InstanceID);
 
 	vec2 ij = vec2(
-		mod(instIdF, float(TILES_COUNT_X)),
-		floor(instIdF / float(TILES_COUNT_X)));
+		mod(instIdF, float(TILE_COUNT_X)),
+		floor(instIdF / float(TILE_COUNT_X)));
 
 	out_instanceId = int(gl_InstanceID);
 
 	const vec2 SIZES =
-		vec2(1.0 / float(TILES_COUNT_X), 1.0 / float(TILES_COUNT_Y));
+		vec2(1.0 / float(TILE_COUNT_X), 1.0 / float(TILE_COUNT_Y));
 
 	const vec2 UVS[4] = vec2[](
 		vec2(0.0, 0.0) * SIZES,

+ 0 - 12
shaders/LightFunctions.glsl

@@ -15,18 +15,6 @@ const float OMNI_LIGHT_FRUSTUM_NEAR_PLANE = 0.1 / 4.0;
 
 const uint SHADOW_SAMPLE_COUNT = 16;
 
-//==============================================================================
-/// Calculate the cluster split
-uint calcClusterSplit(float zVspace)
-{
-	zVspace = -zVspace;
-	float fk = sqrt(
-		(zVspace - u_lightingUniforms.nearFarClustererDivisor.x)
-		/ u_lightingUniforms.nearFarClustererDivisor.z);
-	uint k = uint(fk);
-	return k;
-}
-
 //==============================================================================
 float computeAttenuationFactor(float lightRadius, vec3 frag2Light)
 {

+ 1 - 1
shaders/LightResources.glsl

@@ -14,7 +14,7 @@ struct LightingUniforms
 	vec4 projectionParams;
 	vec4 sceneAmbientColor;
 	vec4 rendererSizeTimePad1;
-	vec4 nearFarClustererDivisor;
+	vec4 nearFarClustererMagicPad1;
 	mat4 viewMat;
 	uvec4 tileCountPad1;
 };

+ 33 - 22
shaders/Refl.frag.glsl

@@ -40,43 +40,54 @@ layout(location = 0) out vec3 out_color;
 
 void main()
 {
-
-#if 0
-	#if IR == 1
-	{
-		float reflLod = float(IR_MIPMAP_COUNT) * roughness;
-		vec3 refl = readReflection(clusterIndex, fragPos, normal, reflLod);
-		out_color += refl * (1.0 - roughness);
-	}
-#endif
-
-// Don't bother for very rough surfaces
-	if(roughness > SSLR_START_ROUGHNESS)
-	{
-		contribution = 0.0;
-		return vec3(0.0);
-	}
-
+	//
 	// Decode the G-buffer
-	float depth = textureRt(u_msDepthRt, in_texCoord).r;
+	//
+	float depth = textureRt(u_depthRt, in_texCoord).r;
 	vec3 posVSpace;
 	posVSpace.z = u_projectionParams.z / (u_projectionParams.w + depth);
 	posVSpace.xy =
 		(2.0 * in_texCoord - 1.0) * u_projectionParams.xy * posVSpace.z;
 
 	float roughness;
-	readRoughnessFromGBuffer(u_rt1, in_texCoord, roughness);
+	readRoughnessFromGBuffer(u_msRt1, in_texCoord, roughness);
 
 	vec3 normal;
-	readNormalFromGBuffer(u_rt2, in_texCoord, normal);
+	readNormalFromGBuffer(u_msRt2, in_texCoord, normal);
+
+	// Compute relflection vector
+	vec3 eye = normalize(posVSpace);
+	vec3 r = reflect(eye, normal);
 
-	// First the SSLR
+	//
+	// SSLR
+	//
 #if SSLR_ENABLED
+	float sslrContribution;
 
+	// Don't bother for very rough surfaces
+	if(roughness > SSLR_START_ROUGHNESS)
+	{
+		sslrContribution = 1.0;
+		out_color = vec3(1.0, 0.0, 1.0);
+	}
+	else
+	{
+		sslrContribution = 0.0;
+	}
 #else
+	const sslrContribution = 0.0;
 #endif
 
+	//
+	// IR
+	//
+#if IR_ENABLED
+	float reflLod = float(IR_MIPMAP_COUNT) * roughness;
+	vec3 imgRefl = doImageReflections(posVSpace, r, reflLod);
+	out_color = mix(imgRefl, out_color, sslrContribution);
 #endif
 
-	out_color = vec3(0.0, 0.0, 1.0);
+	//out_color *= (1.0 - roughness);
+	out_color = vec3(0.0);
 }

+ 1 - 1
src/core/App.cpp

@@ -260,7 +260,7 @@ Error App::createInternal(const ConfigSet& config_,
 	rinit.m_cacheDir = m_cacheDir.toCString();
 	rinit.m_allocCallback = m_allocCb;
 	rinit.m_allocCallbackData = m_allocCbData;
-	rinit.m_tempAllocatorMemorySize = 1024 * 1024 * 4;
+	rinit.m_tempAllocatorMemorySize = 1024 * 1024 * 5;
 	m_resources = m_heapAlloc.newInstance<ResourceManager>();
 
 	ANKI_CHECK(m_resources->create(rinit));

+ 2 - 0
src/renderer/Clusterer.cpp

@@ -169,6 +169,8 @@ void Clusterer::prepare(ThreadPool& threadPool, const FrustumComponent& frc)
 	m_near = pfr.getNear();
 	m_far = pfr.getFar();
 	m_calcNearOpt = (m_far - m_near) / pow(m_counts[2], 2.0);
+	U countXY = m_counts[0] * m_counts[1];
+	m_shaderMagicVal = -(countXY * countXY) / m_calcNearOpt;
 
 	//
 	// Issue parallel jobs

+ 1 - 1
src/renderer/Ir.cpp

@@ -279,7 +279,7 @@ Error Ir::run(CommandBufferPtr cmdb)
 
 	Vec4* nearClusterDivisor = reinterpret_cast<Vec4*>(invViewRotation + 1);
 	nearClusterDivisor->x() = frc.getFrustum().getNear();
-	nearClusterDivisor->y() = m_clusterer.getDivisor();
+	nearClusterDivisor->y() = m_clusterer.getShaderMagicValue();
 	nearClusterDivisor->z() = 0.0;
 	nearClusterDivisor->w() = 0.0;
 

+ 5 - 5
src/renderer/Is.cpp

@@ -53,7 +53,7 @@ struct ShaderCommonUniforms
 	Vec4 m_projectionParams;
 	Vec4 m_sceneAmbientColor;
 	Vec4 m_rendererSizeTimePad1;
-	Vec4 m_nearFarClustererDivisor;
+	Vec4 m_nearFarClustererMagicPad1;
 	Mat4 m_viewMat;
 	UVec4 m_tileCount;
 };
@@ -239,8 +239,8 @@ Error Is::initInternal(const ConfigSet& config)
 	StringAuto pps(getAllocator());
 
 	pps.sprintf(
-		"\n#define TILES_COUNT_X %u\n"
-		"#define TILES_COUNT_Y %u\n"
+		"\n#define TILE_COUNT_X %u\n"
+		"#define TILE_COUNT_Y %u\n"
 		"#define CLUSTER_COUNT %u\n"
 		"#define RENDERER_WIDTH %u\n"
 		"#define RENDERER_HEIGHT %u\n"
@@ -775,10 +775,10 @@ void Is::updateCommonBlock(CommandBufferPtr& cmdb, const FrustumComponent& fr)
 	blk->m_projectionParams = m_r->getProjectionParameters();
 	blk->m_sceneAmbientColor = m_ambientColor;
 	blk->m_viewMat = fr.getViewMatrix().getTransposed();
-	blk->m_nearFarClustererDivisor = Vec4(
+	blk->m_nearFarClustererMagicPad1 = Vec4(
 		fr.getFrustum().getNear(),
 		fr.getFrustum().getFar(),
-		m_r->getClusterer().getDivisor(),
+		m_r->getClusterer().getShaderMagicValue(),
 		0.0);
 
 	blk->m_rendererSizeTimePad1 = Vec4(m_r->getWidth(), m_r->getHeight(),

+ 4 - 0
src/renderer/Refl.cpp

@@ -90,6 +90,8 @@ Error Refl::init1stPass(const ConfigSet& config)
 	pps.sprintf(
 		"#define WIDTH %u\n"
 		"#define HEIGHT %u\n"
+		"#define TILE_COUNT_X %u\n"
+		"#define TILE_COUNT_Y %u\n"
 		"#define SSLR_ENABLED %u\n"
 		"#define IR_ENABLED %u\n"
 		"#define IR_MIPMAP_COUNT %u\n"
@@ -97,6 +99,8 @@ Error Refl::init1stPass(const ConfigSet& config)
 		"#define SSLR_START_ROUGHNESS %f\n",
 		m_width,
 		m_height,
+		(m_irEnabled) ? m_ir->getClusterer().getClusterCountX() : 0,
+		(m_irEnabled) ? m_ir->getClusterer().getClusterCountY() : 0,
 		U(m_sslrEnabled),
 		U(m_irEnabled),
 		(m_irEnabled) ? m_ir->getCubemapArrayMipmapCount() : 0,