Browse Source

Fix bugs and optimize

Panagiotis Christopoulos Charitos 7 years ago
parent
commit
58fed9ff94

+ 5 - 0
samples/common/Framework.cpp

@@ -150,6 +150,11 @@ Error SampleApp::userMainLoop(Bool& quit)
 		mover->rotateLocalZ(-ROTATE_ANGLE);
 	}
 
+	if(in.getKey(KeyCode::F12) == 1)
+	{
+		CoreTracerSingleton::get().m_enabled = !CoreTracerSingleton::get().m_enabled;
+	}
+
 	if(in.getMousePosition() != Vec2(0.0))
 	{
 		F32 angY = -ROTATE_ANGLE * in.getMousePosition().x() * MOUSE_SENSITIVITY * getMainRenderer().getAspectRatio();

+ 36 - 0
shaders/ClusteredShadingCommon.glsl

@@ -111,3 +111,39 @@ layout(std430, ANKI_SS_BINDING(LIGHT_SET, LIGHT_SS_BINDING + 1)) readonly buffer
 {
 	U32 u_lightIndices[];
 };
+
+// Debugging function
+Vec3 lightHeatmap(U32 firstIndex, U32 maxLights, Bool decals, Bool plights, Bool slights, Bool probes)
+{
+	U32 count = 0;
+
+	U32 decalCount = u_lightIndices[firstIndex];
+	firstIndex += decalCount + 1u;
+	if(decals)
+	{
+		count += decalCount;
+	}
+
+	U32 pointLightCount = u_lightIndices[firstIndex];
+	firstIndex += pointLightCount + 1u;
+	if(plights)
+	{
+		count += pointLightCount;
+	}
+
+	U32 spotLightCount = u_lightIndices[firstIndex];
+	firstIndex += spotLightCount + 1u;
+	if(slights)
+	{
+		count += spotLightCount;
+	}
+
+	U32 probeCount = u_lightIndices[firstIndex];
+	if(probes)
+	{
+		count += probeCount;
+	}
+
+	F32 factor = min(1.0, F32(count) / F32(maxLights));
+	return heatmap(factor);
+}

+ 2 - 24
shaders/LightShading.glslp

@@ -130,6 +130,8 @@ void main()
 			k * (CLUSTER_COUNT_X * CLUSTER_COUNT_Y) + U32(in_clusterIJ.y) * CLUSTER_COUNT_X + U32(in_clusterIJ.x);
 
 		idxOffset = u_clusters[clusterIdx];
+
+		// out_color = lightHeatmap(idxOffset, 5, false, true, false, false); return;
 	}
 
 	// Decode GBuffer
@@ -212,29 +214,5 @@ void main()
 
 		out_color += indirectColor * gbuffer.m_diffuse + finalRefl * env;
 	}
-
-#if 0
-	count = scount;
-	if(count == 0)
-	{
-		out_color = Vec3(0.0, 0.0, 0.0);
-	}
-	else if(count == 1)
-	{
-		out_color = Vec3(1.0, 0.0, 0.0);
-	}
-	else if(count == 2)
-	{
-		out_color = Vec3(0.0, 1.0, 0.0);
-	}
-	else if(count == 3)
-	{
-		out_color = Vec3(0.0, 0.0, 1.0);
-	}
-	else
-	{
-		out_color = Vec3(1.0, 1.0, 1.0);
-	}
-#endif
 }
 #pragma anki end

+ 79 - 52
src/anki/renderer/ClusterBin.cpp

@@ -11,6 +11,7 @@
 #include <anki/collision/Tests.h>
 #include <anki/util/ThreadHive.h>
 #include <anki/core/Config.h>
+#include <anki/core/Trace.h>
 
 namespace anki
 {
@@ -20,23 +21,23 @@ static const F32 INVALID_TEXTURE_INDEX = -1.0;
 static const U32 MAX_TYPED_OBJECTS_PER_CLUSTER = 64;
 
 /// Get a view space point.
-static Vec4 unproject(const F32 zVspace, const Vec2& ndc, const Vec4& projParams)
+static Vec4 unproject(const F32 zVspace, const Vec2& ndc, const Vec4& unprojParams)
 {
 	Vec4 view;
-	Vec2 viewxy = ndc * projParams.xy() * zVspace;
-	view.x() = viewxy.x();
-	view.y() = viewxy.y();
-	view.z() = zVspace;
-	view.w() = 0.0;
+	view.x() = ndc.x() * unprojParams.x();
+	view.y() = ndc.y() * unprojParams.y();
+	view.z() = 1.0f;
+	view.w() = 0.0f;
 
-	return view;
+	return view * zVspace;
 }
 
 /// https://bartwronski.com/2017/04/13/cull-that-cone/
 static Bool testConeVsSphere(
-	const Vec3& coneOrigin, const Vec3& coneDir, F32 coneLength, F32 coneAngle, const Sphere& sphere)
+	const Vec4& coneOrigin, const Vec4& coneDir, F32 coneLength, F32 coneAngle, const Sphere& sphere)
 {
-	const Vec3 V = sphere.getCenter().xyz() - coneOrigin;
+	ANKI_ASSERT(coneOrigin.w() == 0.0f && sphere.getCenter().w() == 0.0f && coneDir.w() == 0.0f);
+	const Vec4 V = sphere.getCenter() - coneOrigin;
 	const F32 VlenSq = V.dot(V);
 	const F32 V1len = V.dot(coneDir);
 	const F32 distanceClosestPoint = cos(coneAngle) * sqrt(VlenSq - V1len * V1len) - V1len * sin(coneAngle);
@@ -67,10 +68,20 @@ public:
 	Atomic<U32> m_allocatedIndexCount = {TYPED_OBJECT_COUNT};
 
 	Vec4 m_unprojParams;
+
+	Bool m_clusterEdgesVSpaceDirty;
 };
 
-void ClusterBin::init(U32 clusterCountX, U32 clusterCountY, U32 clusterCountZ, const ConfigSet& cfg)
+ClusterBin::~ClusterBin()
+{
+	m_clusterEdgesVSpace.destroy(m_alloc);
+}
+
+void ClusterBin::init(
+	HeapAllocator<U8> alloc, U32 clusterCountX, U32 clusterCountY, U32 clusterCountZ, const ConfigSet& cfg)
 {
+	m_alloc = alloc;
+
 	m_clusterCounts[0] = clusterCountX;
 	m_clusterCounts[1] = clusterCountY;
 	m_clusterCounts[2] = clusterCountZ;
@@ -78,16 +89,22 @@ void ClusterBin::init(U32 clusterCountX, U32 clusterCountY, U32 clusterCountZ, c
 	m_totalClusterCount = clusterCountX * clusterCountY * clusterCountZ;
 
 	m_indexCount = m_totalClusterCount * cfg.getNumber("r.avgObjectsPerCluster");
+
+	m_clusterEdgesVSpace.create(m_alloc, m_totalClusterCount * 8);
 }
 
 void ClusterBin::binToClustersCallback(
 	void* userData, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* signalSemaphore)
 {
 	ANKI_ASSERT(userData);
+
+	ANKI_TRACE_SCOPED_EVENT(R_BIN_TO_CLUSTERS);
 	BinCtx& ctx = *static_cast<BinCtx*>(userData);
 
-	while(ctx.m_bin->processNextCluster(ctx))
+	U clusterIdx;
+	while((clusterIdx = ctx.m_clusterIdxToProcess.fetchAdd(1)) <= ctx.m_bin->m_totalClusterCount)
 	{
+		ctx.m_bin->binCluster(clusterIdx, ctx);
 	}
 }
 
@@ -95,6 +112,8 @@ void ClusterBin::writeTypedObjectsToGpuBuffersCallback(
 	void* userData, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* signalSemaphore)
 {
 	ANKI_ASSERT(userData);
+
+	ANKI_TRACE_SCOPED_EVENT(R_WRITE_LIGHT_BUFFERS);
 	BinCtx& ctx = *static_cast<BinCtx*>(userData);
 	ctx.m_bin->writeTypedObjectsToGpuBuffers(ctx);
 }
@@ -108,6 +127,16 @@ void ClusterBin::bin(ClusterBinIn& in, ClusterBinOut& out)
 
 	prepare(ctx);
 
+	if(ctx.m_unprojParams != m_prevUnprojParams)
+	{
+		ctx.m_clusterEdgesVSpaceDirty = true;
+		m_prevUnprojParams = ctx.m_unprojParams;
+	}
+	else
+	{
+		ctx.m_clusterEdgesVSpaceDirty = false;
+	}
+
 	// Allocate indices
 	U32* indices = static_cast<U32*>(ctx.m_in->m_stagingMem->allocateFrame(
 		m_indexCount * sizeof(U32), StagingGpuMemoryType::STORAGE, ctx.m_out->m_indicesToken));
@@ -190,60 +219,60 @@ void ClusterBin::prepare(BinCtx& ctx)
 	ctx.m_unprojParams = ctx.m_in->m_renderQueue->m_projectionMatrix.extractPerspectiveUnprojectionParams();
 }
 
-Bool ClusterBin::processNextCluster(BinCtx& ctx) const
+void ClusterBin::binCluster(U32 clusterIdx, BinCtx& ctx)
 {
-	const U clusterIdx = ctx.m_clusterIdxToProcess.fetchAdd(1);
-	if(clusterIdx >= m_totalClusterCount)
-	{
-		// Done
-		return false;
-	}
-
 	// Get the cluster indices
 	U clusterX, clusterY, clusterZ;
 	unflatten3dArrayIndex(
 		m_clusterCounts[2], m_clusterCounts[1], m_clusterCounts[0], clusterIdx, clusterZ, clusterY, clusterX);
 
 	// Compute the cluster edges in vspace
-	Array<Vec4, 8> clusterEdgesVSpace;
-
-	const F32 zNear = -computeClusterNear(ctx.m_out->m_shaderMagicValues, clusterZ);
-	const F32 zFar = -computeClusterFar(ctx.m_out->m_shaderMagicValues, clusterZ);
-	ANKI_ASSERT(zNear > zFar);
-
-	const Vec2 tileSize = 2.0f / Vec2(m_clusterCounts[0], m_clusterCounts[1]);
-	const Vec2 startNdc = Vec2(F32(clusterX) / m_clusterCounts[0], F32(clusterY) / m_clusterCounts[1]) * 2.0f - 1.0f;
-
-	const Vec4& unprojParams = ctx.m_unprojParams;
-	clusterEdgesVSpace[0] = unproject(zNear, startNdc, unprojParams);
-	clusterEdgesVSpace[1] = unproject(zNear, startNdc + Vec2(tileSize.x(), 0.0f), unprojParams);
-	clusterEdgesVSpace[2] = unproject(zNear, startNdc + Vec2(0.0f, tileSize.y()), unprojParams);
-	clusterEdgesVSpace[3] = unproject(zNear, startNdc + tileSize, unprojParams);
-	clusterEdgesVSpace[4] = unproject(zFar, startNdc, unprojParams);
-	clusterEdgesVSpace[5] = unproject(zFar, startNdc + Vec2(tileSize.x(), 0.0f), unprojParams);
-	clusterEdgesVSpace[6] = unproject(zFar, startNdc + Vec2(0.0f, tileSize.y()), unprojParams);
-	clusterEdgesVSpace[7] = unproject(zFar, startNdc + tileSize, unprojParams);
-
-	// Move the cluster edges to wspace
-	Array<Vec4, 8> clusterEdgesWSpace;
-	for(U i = 0; i < 8; ++i)
+	Vec4* clusterEdgesVSpace = &m_clusterEdgesVSpace[clusterIdx * 8];
+	if(ANKI_UNLIKELY(ctx.m_clusterEdgesVSpaceDirty))
 	{
-		clusterEdgesWSpace[i] = ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[i];
+		const F32 zNear = -computeClusterNear(ctx.m_out->m_shaderMagicValues, clusterZ);
+		const F32 zFar = -computeClusterFar(ctx.m_out->m_shaderMagicValues, clusterZ);
+		ANKI_ASSERT(zNear > zFar);
+
+		const Vec2 tileSize = 2.0f / Vec2(m_clusterCounts[0], m_clusterCounts[1]);
+		const Vec2 startNdc =
+			Vec2(F32(clusterX) / m_clusterCounts[0], F32(clusterY) / m_clusterCounts[1]) * 2.0f - 1.0f;
+
+		const Vec4& unprojParams = ctx.m_unprojParams;
+		clusterEdgesVSpace[0] = unproject(zNear, startNdc, unprojParams).xyz1();
+		clusterEdgesVSpace[1] = unproject(zNear, startNdc + Vec2(tileSize.x(), 0.0f), unprojParams).xyz1();
+		clusterEdgesVSpace[2] = unproject(zNear, startNdc + Vec2(0.0f, tileSize.y()), unprojParams).xyz1();
+		clusterEdgesVSpace[3] = unproject(zNear, startNdc + tileSize, unprojParams).xyz1();
+		clusterEdgesVSpace[4] = unproject(zFar, startNdc, unprojParams).xyz1();
+		clusterEdgesVSpace[5] = unproject(zFar, startNdc + Vec2(tileSize.x(), 0.0f), unprojParams).xyz1();
+		clusterEdgesVSpace[6] = unproject(zFar, startNdc + Vec2(0.0f, tileSize.y()), unprojParams).xyz1();
+		clusterEdgesVSpace[7] = unproject(zFar, startNdc + tileSize, unprojParams).xyz1();
 	}
 
+	// Transform the cluster edges to wspace
+	Array<Vec4, 8> clusterEdgesWSpace;
+	clusterEdgesWSpace[0] = ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[0];
+	clusterEdgesWSpace[1] = ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[1];
+	clusterEdgesWSpace[2] = ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[2];
+	clusterEdgesWSpace[3] = ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[3];
+	clusterEdgesWSpace[4] = ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[4];
+	clusterEdgesWSpace[5] = ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[5];
+	clusterEdgesWSpace[6] = ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[6];
+	clusterEdgesWSpace[7] = ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[7];
+
 	// Compute an AABB and a sphere that contains the cluster
-	Vec3 aabbMin(MAX_F32);
-	Vec3 aabbMax(MIN_F32);
+	Vec4 aabbMin(MAX_F32, MAX_F32, MAX_F32, 0.0f);
+	Vec4 aabbMax(MIN_F32, MIN_F32, MIN_F32, 0.0f);
 	for(U i = 0; i < 8; ++i)
 	{
-		aabbMin = aabbMin.min(clusterEdgesWSpace[i].xyz());
-		aabbMax = aabbMax.max(clusterEdgesWSpace[i].xyz());
+		aabbMin = aabbMin.min(clusterEdgesWSpace[i]);
+		aabbMax = aabbMax.max(clusterEdgesWSpace[i]);
 	}
 
 	const Aabb clusterBox(aabbMin, aabbMax);
 
-	const Vec3 sphereCenter = (aabbMin + aabbMax) / 2.0f;
-	const Sphere clusterSphere(sphereCenter.xyz0(), (aabbMin - sphereCenter).getLength());
+	const Vec4 sphereCenter = (aabbMin + aabbMax) / 2.0f;
+	const Sphere clusterSphere(sphereCenter, (aabbMin - sphereCenter).getLength());
 
 	// Bin decals
 	Array<U32, MAX_TYPED_OBJECTS_PER_CLUSTER> objectIndices;
@@ -299,8 +328,8 @@ Bool ClusterBin::processNextCluster(BinCtx& ctx) const
 	++pObjectIndex;
 	for(const SpotLightQueueElement& slight : ctx.m_in->m_renderQueue->m_spotLights)
 	{
-		if(testConeVsSphere(slight.m_worldTransform.getTranslationPart().xyz(),
-			   slight.m_worldTransform.getZAxis().xyz(),
+		if(testConeVsSphere(slight.m_worldTransform.getTranslationPart(),
+			   -slight.m_worldTransform.getZAxis(),
 			   slight.m_distance,
 			   slight.m_outerAngle,
 			   clusterSphere))
@@ -366,8 +395,6 @@ Bool ClusterBin::processNextCluster(BinCtx& ctx) const
 
 	// Write the cluster
 	ctx.m_clusters[clusterIdx] = firstIndex;
-
-	return true;
 }
 
 void ClusterBin::writeTypedObjectsToGpuBuffers(BinCtx& ctx) const

+ 9 - 2
src/anki/renderer/ClusterBin.h

@@ -52,20 +52,27 @@ public:
 class ClusterBin
 {
 public:
-	void init(U32 clusterCountX, U32 clusterCountY, U32 clusterCountZ, const ConfigSet& cfg);
+	~ClusterBin();
+
+	void init(HeapAllocator<U8> alloc, U32 clusterCountX, U32 clusterCountY, U32 clusterCountZ, const ConfigSet& cfg);
 
 	void bin(ClusterBinIn& in, ClusterBinOut& out);
 
 private:
 	class BinCtx;
 
+	HeapAllocator<U8> m_alloc;
+
 	Array<U32, 3> m_clusterCounts = {};
 	U32 m_totalClusterCount = 0;
 	U32 m_indexCount = 0;
 
+	DynamicArray<Vec4> m_clusterEdgesVSpace; ///< Cache those for opt.
+	Vec4 m_prevUnprojParams = Vec4(0.0f); ///< To check if m_clusterEdgesVSpace is dirty.
+
 	void prepare(BinCtx& ctx);
 
-	Bool processNextCluster(BinCtx& ctx) const;
+	void binCluster(U32 clusterIdx, BinCtx& ctx);
 
 	void writeTypedObjectsToGpuBuffers(BinCtx& ctx) const;
 

+ 1 - 1
src/anki/renderer/Renderer.cpp

@@ -86,7 +86,7 @@ Error Renderer::initInternal(const ConfigSet& config)
 	m_clusterCount[2] = config.getNumber("r.clusterSizeZ");
 	m_clusterCount[3] = m_clusterCount[0] * m_clusterCount[1] * m_clusterCount[2];
 
-	m_clusterBin.init(m_clusterCount[0], m_clusterCount[1], m_clusterCount[2], config);
+	m_clusterBin.init(m_alloc, m_clusterCount[0], m_clusterCount[1], m_clusterCount[2], config);
 
 	// A few sanity checks
 	if(m_width < 10 || m_height < 10)

+ 1 - 0
src/anki/util/Functions.h

@@ -221,6 +221,7 @@ constexpr Bool isPacked()
 /// indices.
 inline void unflatten3dArrayIndex(const U sizeA, const U sizeB, const U sizeC, const U flatIdx, U& a, U& b, U& c)
 {
+	ANKI_ASSERT(flatIdx < (sizeA * sizeB * sizeC));
 	a = (flatIdx / (sizeB * sizeC)) % sizeA;
 	b = (flatIdx / sizeC) % sizeB;
 	c = flatIdx % sizeC;