Browse Source

Finalize the cluster bin

Panagiotis Christopoulos Charitos 7 years ago
parent
commit
1260e10b70

+ 1 - 1
src/anki/core/Config.cpp

@@ -18,7 +18,7 @@ Config::Config()
 	newOption("r.clusterSizeX", 32);
 	newOption("r.clusterSizeY", 26);
 	newOption("r.clusterSizeZ", 32);
-	newOption("r.maxLightsPerCluster", 8);
+	newOption("r.avgObjectsPerCluster", 8);
 
 	newOption("r.shadowMapping.enabled", true);
 	newOption("r.shadowMapping.resolution", 512);

+ 199 - 24
src/anki/renderer/ClusterBin.cpp

@@ -9,12 +9,17 @@
 #include <anki/collision/Sphere.h>
 #include <anki/collision/Functions.h>
 #include <anki/collision/Tests.h>
+#include <anki/util/ThreadHive.h>
+#include <anki/core/Config.h>
 
 namespace anki
 {
 
-const U TYPED_OBJECT_COUNT = 4; // Point, spot, decal & probe
+static const U32 TYPED_OBJECT_COUNT = 4; // Point, spot, decal & probe
+static const F32 INVALID_TEXTURE_INDEX = -1.0;
+static const U32 MAX_TYPED_OBJECTS_PER_CLUSTER = 64;
 
+/// Get a view space point.
 static Vec4 unproject(const F32 zVspace, const Vec2& ndc, const Vec4& projParams)
 {
 	Vec4 view;
@@ -27,6 +32,7 @@ static Vec4 unproject(const F32 zVspace, const Vec2& ndc, const Vec4& projParams
 	return view;
 }
 
+/// https://bartwronski.com/2017/04/13/cull-that-cone/
 static Bool testConeVsSphere(
 	const Vec3& coneOrigin, const Vec3& coneDir, F32 coneLength, F32 coneAngle, const Sphere& sphere)
 {
@@ -63,24 +69,34 @@ public:
 	Vec4 m_unprojParams;
 };
 
-ClusterBin::ClusterBin(
-	const GenericMemoryPoolAllocator<U8>& alloc, U32 clusterCountX, U32 clusterCountY, U32 clusterCountZ)
-	: m_alloc(alloc)
-	, m_clusterCounts{{clusterCountX, clusterCountY, clusterCountZ}}
+void ClusterBin::init(U32 clusterCountX, U32 clusterCountY, U32 clusterCountZ, const Config& cfg)
 {
+	m_clusterCounts[0] = clusterCountX;
+	m_clusterCounts[1] = clusterCountY;
+	m_clusterCounts[2] = clusterCountZ;
+
 	m_totalClusterCount = clusterCountX * clusterCountY * clusterCountZ;
+
+	m_indexCount = m_totalClusterCount * cfg.getNumber("r.avgObjectsPerCluster");
 }
 
-ClusterBin::~ClusterBin()
+void ClusterBin::binToClustersCallback(
+	void* userData, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* signalSemaphore)
 {
+	ANKI_ASSERT(userData);
+	BinCtx& ctx = *static_cast<BinCtx*>(userData);
+
+	while(ctx.m_bin->processNextCluster(ctx))
+	{
+	}
 }
 
-void ClusterBin::processNextClusterCallback(
+void ClusterBin::writeTypedObjectsToGpuBuffersCallback(
 	void* userData, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* signalSemaphore)
 {
 	ANKI_ASSERT(userData);
 	BinCtx& ctx = *static_cast<BinCtx*>(userData);
-	ctx.m_bin->processNextCluster(ctx);
+	ctx.m_bin->writeTypedObjectsToGpuBuffers(ctx);
 }
 
 void ClusterBin::bin(ClusterBinIn& in, ClusterBinOut& out)
@@ -94,13 +110,35 @@ void ClusterBin::bin(ClusterBinIn& in, ClusterBinOut& out)
 
 	// Allocate indices
 	U32* indices = static_cast<U32*>(ctx.m_in->m_stagingMem->allocateFrame(
-		ctx.m_in->m_maxLightIndices * sizeof(U32), StagingGpuMemoryType::STORAGE, ctx.m_out->m_indicesToken));
-	ctx.m_lightIds = WeakArray<U32>(indices, ctx.m_in->m_maxLightIndices);
+		m_indexCount * sizeof(U32), StagingGpuMemoryType::STORAGE, ctx.m_out->m_indicesToken));
+	ctx.m_lightIds = WeakArray<U32>(indices, m_indexCount);
+
+	// Reserve some indices for empty clusters
+	for(U i = 0; i < TYPED_OBJECT_COUNT; ++i)
+	{
+		indices[i] = 0;
+	}
 
 	// Allocate clusters
 	U32* clusters = static_cast<U32*>(ctx.m_in->m_stagingMem->allocateFrame(
 		sizeof(U32) * m_totalClusterCount, StagingGpuMemoryType::STORAGE, ctx.m_out->m_clustersToken));
 	ctx.m_clusters = WeakArray<U32>(clusters, m_totalClusterCount);
+
+	// Create task for writing GPU buffers
+	Array<ThreadHiveTask, ThreadHive::MAX_THREADS + 1> tasks;
+	tasks[0].m_callback = writeTypedObjectsToGpuBuffersCallback;
+	tasks[0].m_argument = &ctx;
+
+	// Create tasks for binning
+	for(U threadIdx = 0; threadIdx < in.m_threadHive->getThreadCount(); ++threadIdx)
+	{
+		tasks[threadIdx + 1].m_callback = binToClustersCallback;
+		tasks[threadIdx + 1].m_argument = &ctx;
+	}
+
+	// Submit and wait
+	in.m_threadHive->submitTasks(&tasks[0], in.m_threadHive->getThreadCount() + 1);
+	in.m_threadHive->waitAllTasks();
 }
 
 void ClusterBin::prepare(BinCtx& ctx)
@@ -152,13 +190,13 @@ void ClusterBin::prepare(BinCtx& ctx)
 	ctx.m_unprojParams = ctx.m_in->m_renderQueue->m_projectionMatrix.extractPerspectiveUnprojectionParams();
 }
 
-void ClusterBin::processNextCluster(BinCtx& ctx) const
+Bool ClusterBin::processNextCluster(BinCtx& ctx) const
 {
 	const U clusterIdx = ctx.m_clusterIdxToProcess.fetchAdd(1);
 	if(clusterIdx >= m_totalClusterCount)
 	{
 		// Done
-		return;
+		return false;
 	}
 
 	// Get the cluster indices
@@ -193,7 +231,7 @@ void ClusterBin::processNextCluster(BinCtx& ctx) const
 		clusterEdgesWSpace[i] = ctx.m_in->m_renderQueue->m_cameraTransform * clusterEdgesVSpace[i];
 	}
 
-	// Compute an AABB that contains the cluster
+	// Compute an AABB and a sphere that contains the cluster
 	Vec3 aabbMin(MAX_F32);
 	Vec3 aabbMax(MIN_F32);
 	for(U i = 0; i < 8; ++i)
@@ -208,7 +246,7 @@ void ClusterBin::processNextCluster(BinCtx& ctx) const
 	const Sphere clusterSphere(sphereCenter.xyz0(), (aabbMin - sphereCenter).getLength());
 
 	// Bin decals
-	Array<U32, 32> objectIndices;
+	Array<U32, MAX_TYPED_OBJECTS_PER_CLUSTER> objectIndices;
 	U32* pObjectIndex = &objectIndices[0];
 	const U32* pObjectIndexEnd = &objectIndices[0] + objectIndices.getSize();
 	(void)pObjectIndexEnd;
@@ -305,6 +343,8 @@ void ClusterBin::processNextCluster(BinCtx& ctx) const
 	U firstIndex;
 	if(indexCount > TYPED_OBJECT_COUNT)
 	{
+		// Have some objects to bin
+
 		firstIndex = ctx.m_allocatedIndexCount.fetchAdd(indexCount);
 
 		if(firstIndex + indexCount <= ctx.m_lightIds.getSize())
@@ -313,7 +353,7 @@ void ClusterBin::processNextCluster(BinCtx& ctx) const
 		}
 		else
 		{
-			ANKI_R_LOGW("XXX");
+			ANKI_R_LOGW("Out of cluster indices. Increase r.avgObjectsPerCluster");
 			firstIndex = 0;
 			indexCount = TYPED_OBJECT_COUNT;
 		}
@@ -326,29 +366,164 @@ void ClusterBin::processNextCluster(BinCtx& ctx) const
 
 	// Write the cluster
 	ctx.m_clusters[clusterIdx] = firstIndex;
+
+	return true;
 }
 
 void ClusterBin::writeTypedObjectsToGpuBuffers(BinCtx& ctx) const
 {
 	const RenderQueue& rqueue = *ctx.m_in->m_renderQueue;
 
-	// Write point lights
-	const U visiblePointLightsCount = rqueue.m_pointLights.getSize();
-	if(visiblePointLightsCount)
+	// Write the point lights
+	const U visiblePointLightCount = rqueue.m_pointLights.getSize();
+	if(visiblePointLightCount)
 	{
-		PointLight* data =
-			static_cast<PointLight*>(ctx.m_in->m_stagingMem->allocateFrame(sizeof(PointLight) * visiblePointLightsCount,
-				StagingGpuMemoryType::UNIFORM,
-				ctx.m_out->m_pointLightsToken));
+		PointLight* data = static_cast<PointLight*>(ctx.m_in->m_stagingMem->allocateFrame(
+			sizeof(PointLight) * visiblePointLightCount, StagingGpuMemoryType::UNIFORM, ctx.m_out->m_pointLightsToken));
 
-		WeakArray<PointLight> gpuLights(data, visiblePointLightsCount);
+		WeakArray<PointLight> gpuLights(data, visiblePointLightCount);
 
-		// TODO
+		for(U i = 0; i < visiblePointLightCount; ++i)
+		{
+			const PointLightQueueElement& in = rqueue.m_pointLights[i];
+			PointLight& out = gpuLights[i];
+
+			out.m_posRadius = Vec4(in.m_worldPosition.xyz(), 1.0f / (in.m_radius * in.m_radius));
+			out.m_diffuseColorTileSize = in.m_diffuseColor.xyz0();
+
+			if(in.m_shadowRenderQueues[0] == nullptr || !ctx.m_in->m_shadowsEnabled)
+			{
+				out.m_diffuseColorTileSize.w() = INVALID_TEXTURE_INDEX;
+			}
+			else
+			{
+				out.m_diffuseColorTileSize.w() = in.m_atlasTileSize;
+				out.m_atlasTiles = UVec2(in.m_atlasTiles.x(), in.m_atlasTiles.y());
+			}
+
+			out.m_radiusPad1 = Vec2(in.m_radius);
+		}
 	}
 	else
 	{
 		ctx.m_out->m_pointLightsToken.markUnused();
 	}
+
+	// Write the spot lights
+	const U visibleSpotLightCount = rqueue.m_spotLights.getSize();
+	if(visibleSpotLightCount)
+	{
+		SpotLight* data = static_cast<SpotLight*>(ctx.m_in->m_stagingMem->allocateFrame(
+			sizeof(SpotLight) * visibleSpotLightCount, StagingGpuMemoryType::UNIFORM, ctx.m_out->m_spotLightsToken));
+
+		WeakArray<SpotLight> gpuLights(data, visibleSpotLightCount);
+
+		for(U i = 0; i < visibleSpotLightCount; ++i)
+		{
+			const SpotLightQueueElement& in = rqueue.m_spotLights[i];
+			SpotLight& out = gpuLights[i];
+
+			F32 shadowmapIndex = INVALID_TEXTURE_INDEX;
+
+			if(in.hasShadow() && ctx.m_in->m_shadowsEnabled)
+			{
+				// bias * proj_l * view_l
+				out.m_texProjectionMat = in.m_textureMatrix;
+
+				shadowmapIndex = 1.0f; // Just set a value
+			}
+
+			// Pos & dist
+			out.m_posRadius =
+				Vec4(in.m_worldTransform.getTranslationPart().xyz(), 1.0f / (in.m_distance * in.m_distance));
+
+			// Diff color and shadowmap ID now
+			out.m_diffuseColorShadowmapId = Vec4(in.m_diffuseColor, shadowmapIndex);
+
+			// Light dir & radius
+			Vec3 lightDir = -in.m_worldTransform.getRotationPart().getZAxis();
+			out.m_lightDirRadius = Vec4(lightDir, in.m_distance);
+
+			// Angles
+			out.m_outerCosInnerCos = Vec4(cos(in.m_outerAngle / 2.0f), cos(in.m_innerAngle / 2.0f), 1.0f, 1.0f);
+		}
+	}
+	else
+	{
+		ctx.m_out->m_spotLightsToken.markUnused();
+	}
+
+	// Write the decals
+	const U visibleDecalCount = rqueue.m_decals.getSize();
+	if(visibleDecalCount)
+	{
+		Decal* data = static_cast<Decal*>(ctx.m_in->m_stagingMem->allocateFrame(
+			sizeof(Decal) * visibleDecalCount, StagingGpuMemoryType::UNIFORM, ctx.m_out->m_decalsToken));
+
+		WeakArray<Decal> gpuDecals(data, visibleDecalCount);
+		TextureView* diffuseAtlas = nullptr;
+		TextureView* specularRoughnessAtlas = nullptr;
+
+		for(U i = 0; i < visibleDecalCount; ++i)
+		{
+			const DecalQueueElement& in = rqueue.m_decals[i];
+			Decal& out = gpuDecals[i];
+
+			if((diffuseAtlas != nullptr && diffuseAtlas != in.m_diffuseAtlas)
+				|| (specularRoughnessAtlas != nullptr && specularRoughnessAtlas != in.m_specularRoughnessAtlas))
+			{
+				ANKI_R_LOGF("All decals should have the same tex atlas");
+			}
+
+			diffuseAtlas = in.m_diffuseAtlas;
+			specularRoughnessAtlas = in.m_specularRoughnessAtlas;
+
+			// Diff
+			Vec4 uv = in.m_diffuseAtlasUv;
+			out.m_diffUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
+			out.m_blendFactors[0] = in.m_diffuseAtlasBlendFactor;
+
+			// Other
+			uv = in.m_specularRoughnessAtlasUv;
+			out.m_normRoughnessUv = Vec4(uv.x(), uv.y(), uv.z() - uv.x(), uv.w() - uv.y());
+			out.m_blendFactors[1] = in.m_specularRoughnessAtlasBlendFactor;
+
+			// bias * proj_l * view
+			out.m_texProjectionMat = in.m_textureMatrix;
+		}
+
+		ANKI_ASSERT(diffuseAtlas && specularRoughnessAtlas);
+		ctx.m_out->m_diffDecalTexView.reset(diffuseAtlas);
+		ctx.m_out->m_specularRoughnessDecalTexView.reset(specularRoughnessAtlas);
+	}
+	else
+	{
+		ctx.m_out->m_decalsToken.markUnused();
+	}
+
+	// Write the probes
+	const U visibleProbeCount = rqueue.m_reflectionProbes.getSize();
+	if(visibleProbeCount)
+	{
+		ReflectionProbe* data = static_cast<ReflectionProbe*>(ctx.m_in->m_stagingMem->allocateFrame(
+			sizeof(ReflectionProbe) * visibleProbeCount, StagingGpuMemoryType::UNIFORM, ctx.m_out->m_probesToken));
+
+		WeakArray<ReflectionProbe> gpuProbes(data, visibleProbeCount);
+
+		for(U i = 0; i < visibleProbeCount; ++i)
+		{
+			const ReflectionProbeQueueElement& in = rqueue.m_reflectionProbes[i];
+			ReflectionProbe& out = gpuProbes[i];
+
+			out.m_positionCubemapIndex = Vec4(in.m_worldPosition, in.m_textureArrayIndex);
+			out.m_aabbMinPad1 = in.m_aabbMin.xyz0();
+			out.m_aabbMaxPad1 = in.m_aabbMax.xyz0();
+		}
+	}
+	else
+	{
+		ctx.m_out->m_probesToken.markUnused();
+	}
 }
 
 } // end namespace anki

+ 7 - 12
src/anki/renderer/ClusterBin.h

@@ -13,6 +13,7 @@ namespace anki
 
 // Forward
 class ThreadHiveSemaphore;
+class Config;
 
 /// @addtogroup renderer
 /// @{
@@ -25,12 +26,9 @@ public:
 
 	const RenderQueue* m_renderQueue ANKI_DBG_NULLIFY;
 
-	Bool m_shadowsEnabled ANKI_DBG_NULLIFY;
-
-	U32 m_maxLightIndices ANKI_DBG_NULLIFY;
+	StagingGpuMemoryManager* m_stagingMem ANKI_DBG_NULLIFY;
 
-	StackAllocator<U8> m_frameAlloc;
-	StagingGpuMemoryManager* m_stagingMem;
+	Bool m_shadowsEnabled ANKI_DBG_NULLIFY;
 };
 
 /// @memberof ClusterBin
@@ -54,30 +52,27 @@ public:
 class ClusterBin
 {
 public:
-	ClusterBin(const GenericMemoryPoolAllocator<U8>& alloc, U32 clusterCountX, U32 clusterCountY, U32 clusterCountZ);
-
-	~ClusterBin();
+	void init(U32 clusterCountX, U32 clusterCountY, U32 clusterCountZ, const Config& cfg);
 
 	void bin(ClusterBinIn& in, ClusterBinOut& out);
 
 private:
 	class BinCtx;
 
-	GenericMemoryPoolAllocator<U8> m_alloc;
-
 	Array<U32, 3> m_clusterCounts = {};
 	U32 m_totalClusterCount = 0;
+	U32 m_indexCount = 0;
 
 	void prepare(BinCtx& ctx);
 
-	void processNextCluster(BinCtx& ctx) const;
+	Bool processNextCluster(BinCtx& ctx) const;
 
 	void writeTypedObjectsToGpuBuffers(BinCtx& ctx) const;
 
 	static void writeTypedObjectsToGpuBuffersCallback(
 		void* userData, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* signalSemaphore);
 
-	static void processNextClusterCallback(
+	static void binToClustersCallback(
 		void* userData, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* signalSemaphore);
 };
 /// @}

+ 1 - 1
src/anki/renderer/LightShading.cpp

@@ -47,7 +47,7 @@ Error LightShading::init(const ConfigSet& config)
 
 Error LightShading::initInternal(const ConfigSet& config)
 {
-	m_maxLightIds = config.getNumber("r.maxLightsPerCluster");
+	m_maxLightIds = config.getNumber("r.avgObjectsPerCluster");
 
 	if(m_maxLightIds == 0)
 	{

+ 2 - 2
src/anki/renderer/RenderQueue.h

@@ -152,9 +152,9 @@ public:
 	const void* m_userData;
 	RenderQueueDrawCallback m_drawCallback;
 	/// Totaly unsafe but we can't have a smart ptr in here since there will be no deletion.
-	const TextureView* m_diffuseAtlas;
+	TextureView* m_diffuseAtlas;
 	/// Totaly unsafe but we can't have a smart ptr in here since there will be no deletion.
-	const TextureView* m_specularRoughnessAtlas;
+	TextureView* m_specularRoughnessAtlas;
 	Vec4 m_diffuseAtlasUv;
 	Vec4 m_specularRoughnessAtlasUv;
 	F32 m_diffuseAtlasBlendFactor;

+ 2 - 2
src/anki/resource/TextureAtlasResource.h

@@ -42,12 +42,12 @@ public:
 	/// Load a texture atlas.
 	ANKI_USE_RESULT Error load(const ResourceFilename& filename, Bool async);
 
-	const TexturePtr& getGrTexture() const
+	TexturePtr getGrTexture() const
 	{
 		return m_tex->getGrTexture();
 	}
 
-	const TextureViewPtr& getGrTextureView() const
+	TextureViewPtr getGrTextureView() const
 	{
 		return m_tex->getGrTextureView();
 	}

+ 3 - 3
src/anki/scene/Visibility.cpp

@@ -239,9 +239,9 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 	RenderQueueView& result = m_frcCtx->m_queueViews[taskId];
 	for(U i = 0; i < m_spatialToTestCount; ++i)
 	{
-		const SpatialComponent* spatialC = m_spatialsToTest[i];
+		SpatialComponent* spatialC = m_spatialsToTest[i];
 		ANKI_ASSERT(spatialC);
-		const SceneNode& node = spatialC->getSceneNode();
+		SceneNode& node = spatialC->getSceneNode();
 
 		// Skip if it is the same
 		if(ANKI_UNLIKELY(&testedNode == &node))
@@ -287,7 +287,7 @@ void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 			wantNode = true;
 		}
 
-		const DecalComponent* decalc = node.tryGetComponent<DecalComponent>();
+		DecalComponent* decalc = node.tryGetComponent<DecalComponent>();
 		if(decalc && wantsDecals)
 		{
 			wantNode = true;

+ 1 - 1
src/anki/scene/components/DecalComponent.h

@@ -118,7 +118,7 @@ public:
 		return m_sizes;
 	}
 
-	void setupDecalQueueElement(DecalQueueElement& el) const
+	void setupDecalQueueElement(DecalQueueElement& el)
 	{
 		el.m_diffuseAtlas = (m_layers[LayerType::DIFFUSE].m_atlas)
 								? m_layers[LayerType::DIFFUSE].m_atlas->getGrTextureView().get()