Bladeren bron

Clusterer v2

Panagiotis Christopoulos Charitos 10 jaren geleden
bovenliggende
commit
4b65ac83cf

+ 0 - 12
include/anki/renderer/Renderer.h

@@ -53,16 +53,6 @@ public:
 		return *m_is;
 	}
 
-	const Tiler& getTiler() const
-	{
-		return *m_tiler;
-	}
-
-	Tiler& getTiler()
-	{
-		return *m_tiler;
-	}
-
 	const Pps& getPps() const
 	{
 		return *m_pps;
@@ -122,7 +112,6 @@ public:
 
 anki_internal:
 	static const U TILE_SIZE = 64;
-	static const U CLUSTER_SPLIT_COUNT = 12;
 
 	void getOutputFramebuffer(FramebufferPtr& outputFb, U32& width, U32& height)
 	{
@@ -299,7 +288,6 @@ private:
 	UniquePtr<Pps> m_pps; ///< Postprocessing rendering stage
 	UniquePtr<Fs> m_fs; ///< Forward shading.
 	UniquePtr<Lf> m_lf; ///< Forward shading lens flares.
-	UniquePtr<Tiler> m_tiler;
 	UniquePtr<Dbg> m_dbg; ///< Debug stage.
 	/// @}
 

+ 50 - 59
include/anki/scene/Clusterer.h

@@ -12,7 +12,7 @@
 
 namespace anki {
 
-class PerspectiveFrustum;
+class FrustumComponent;
 
 /// @addtogroup scene
 /// @{
@@ -45,104 +45,95 @@ private:
 	DArray<Array<U8, 3>> m_clusterIds;
 	U32 m_count = 0;
 	GenericMemoryPoolAllocator<U8> m_alloc;
+
+	void pushBack(U x, U y, U z)
+	{
+		ANKI_ASSERT(x <= 0xFF && y <= 0xFF && z <= 0xFF);
+		m_clusterIds[m_count++] = Array<U8, 3>{U8(x), U8(y), U8(z)};
+	}
 };
 
 /// Collection of clusters for visibility tests.
 class Clusterer
 {
+	friend class UpdatePlanesPerspectiveCameraTask;
+
 public:
 	Clusterer()
 	{}
 
-	~Clusterer()
-	{
-		m_clusters.destroy(m_alloc);
-		m_splitInfo.destroy(m_alloc);
-	}
+	~Clusterer();
 
 	void init(const GenericMemoryPoolAllocator<U8>& alloc, U clusterCountX,
-		U clusterCountY, U clusterCountZ)
-	{
-		m_alloc = alloc;
-		m_counts[0] = clusterCountX;
-		m_counts[1] = clusterCountY;
-		m_counts[2] = clusterCountZ;
-	}
+		U clusterCountY, U clusterCountZ);
 
-	U getClusterCount() const
-	{
-		return U(m_counts[0]) * U(m_counts[1]) * U(m_counts[2]);
-	}
-
-	void prepare(const PerspectiveFrustum& fr);
+	/// Prepare for visibility tests.
+	void prepare(ThreadPool& threadpool, const SceneNode& node);
 
 	void initTestResults(const GenericMemoryPoolAllocator<U8>& alloc,
 		ClustererTestResult& rez) const;
 
 	/// Bin collision shape.
-	/// @param[in] cs The collision shape should be in view space.
-	void bin(const CollisionShape& cs, ClustererTestResult& rez) const;
+	void bin(const CollisionShape& cs, const Aabb& csBox,
+		ClustererTestResult& rez) const;
 
 	void fillShaderParams(Vec4& params) const
 	{
 		params = Vec4(m_near, m_calcNearOpt, 0.0, 0.0);
 	}
 
+	U getClusterCount() const
+	{
+		return m_counts[0] * m_counts[1] * m_counts[2];
+	}
+
 public:
 	GenericMemoryPoolAllocator<U8> m_alloc;
 
 	Array<U8, 3> m_counts;
 
-	class Cluster
-	{
-	public:
-		// Intead of Aabb use minimum size variables
-		Vec3 m_min;
-		Vec3 m_max;
-	};
+	/// Tile planes.
+	DArray<Plane> m_allPlanes; ///< Do one allocation.
+	SArray<Plane> m_planesY; ///< Local space.
+	SArray<Plane> m_planesX; ///< Local space.
+	SArray<Plane> m_planesYW;
+	SArray<Plane> m_planesXW;
+	Plane* m_nearPlane; ///< In world space
+	Plane* m_farPlane; ///< In world space
 
-	/// [z][y][x]
-	DArray<Cluster> m_clusters;
+	/// Used to check if the frustum is changed and we need to update the
+	/// planes.
+	const SceneNode* m_node = nullptr;
 
-	class SplitInfo
-	{
-	public:
-		Vec2 m_xy;
-		Vec2 m_sizes;
-	};
+	const FrustumComponent* m_frc = nullptr; ///< Cache it.
 
-	DArray<SplitInfo> m_splitInfo;
+	/// Timestamp for the same reason as m_frc.
+	Timestamp m_planesLSpaceTimestamp = 0;
 
 	F32 m_near = 0.0;
 	F32 m_far = 0.0;
-	F32 m_fovY = 0.0;
-	F32 m_fovX = 0.0;
-
 	F32 m_calcNearOpt = 0.0;
-	Mat4 m_projMat;
 
-	Cluster& cluster(U x, U y, U z)
-	{
-		ANKI_ASSERT(x < m_counts[0]);
-		ANKI_ASSERT(y < m_counts[1]);
-		ANKI_ASSERT(z < m_counts[2]);
-		return m_clusters[m_counts[0] * (z * m_counts[1] + y) + x];
-	}
+	F32 calcNear(U k) const;
 
-	const Cluster& cluster(U x, U y, U z) const
-	{
-		ANKI_ASSERT(x < m_counts[0]);
-		ANKI_ASSERT(y < m_counts[1]);
-		ANKI_ASSERT(z < m_counts[2]);
-		return m_clusters[m_counts[0] * (z * m_counts[1] + y) + x];
-	}
+	U calcZ(F32 zVspace) const;
 
-	F32 calcNear(U k) const;
-	U calcK(F32 zVspace) const;
+	void binGeneric(const CollisionShape& cs, U xBegin, U xEnd, U yBegin,
+		U yEnd, U zBegin, U zEnd, ClustererTestResult& rez) const;
+
+	/// Special fast path for binning spheres.
+	void binSphere(const Sphere& s, const Aabb& aabb,
+		ClustererTestResult& rez) const;
+
+	void computeSplitRange(const CollisionShape& cs, U& zBegin, U& zEnd) const;
+
+	void update(U32 threadId, PtrSize threadsCount, Bool frustumChanged);
 
-	void initClusters();
+	/// Calculate and set a top looking plane.
+	void calcPlaneY(U i, const Vec4& projParams);
 
-	void findSplitsFromAabb(const Aabb& box, U& zFrom, U& zTo) const;
+	/// Calculate and set a right looking plane.
+	void calcPlaneX(U j, const Vec4& projParams);
 };
 /// @}
 

+ 5 - 4
shaders/IsLp.frag.glsl

@@ -17,6 +17,7 @@
 #endif
 
 const uint CLUSTER_COUNT_Z = CLUSTER_COUNT / (TILES_X_COUNT * TILES_Y_COUNT);
+const uint TILE_COUNT = TILES_X_COUNT * TILES_Y_COUNT;
 
 // The base of all lights
 struct Light
@@ -48,12 +49,12 @@ layout(std140, binding = 1) readonly buffer _s1
 	SpotLight u_spotLights[MAX_SPOT_LIGHTS];
 };
 
-layout(std430, binding = 3) readonly buffer _s3
+layout(std430, binding = 2) readonly buffer _s3
 {
 	uint u_clusters[CLUSTER_COUNT];
 };
 
-layout(std430, binding = 4) readonly buffer _s5
+layout(std430, binding = 3) readonly buffer _s5
 {
 	uint u_lightIndices[MAX_LIGHT_INDICES];
 };
@@ -310,7 +311,7 @@ void main()
 
 	// Get counts and offsets
 	uint k = calcK(fragPos.z);
-	uint cluster = u_clusters[in_instanceId + k * CLUSTER_COUNT_Z];
+	uint cluster = u_clusters[in_instanceId + k * TILE_COUNT];
 	uint lightOffset = cluster >> 16u;
 	uint pointLightsCount = (cluster >> 8u) & 0xFFu;
 	uint spotLightsCount = cluster & 0xFFu;
@@ -369,7 +370,7 @@ void main()
 #if 0
 	if(pointLightsCount != 0)
 	{
-		out_color = vec3(float(pointLightsCount) * 0.05);
+		out_color = vec3(1.0);
 	}
 
 	/*uint x = in_instanceId % 60;

+ 1 - 0
src/core/Config.cpp

@@ -72,6 +72,7 @@ Config::Config()
 	newOption("samples", 1);
 	newOption("tessellation", true);
 	newOption("sceneFrameAllocatorSize", 1024 * 1024);
+	newOption("clusterSizeZ", 32);
 
 	newOption("offscreen", false);
 

+ 5 - 5
src/renderer/Dbg.cpp

@@ -160,14 +160,14 @@ Error Dbg::run(CommandBufferPtr& cmdb)
 		PerspectiveFrustum fr;
 		const F32 ang = 55.0;
 		F32 far = 200.0;
-		fr.setAll(toRad(ang) * m_r->getAspectRatio(), toRad(ang), 1.0, far);
+		fr.setAll(toRad(ang) * m_r->getAspectRatio(), toRad(ang), 0.2, far);
 		fr.resetTransform(Transform(origin, Mat3x4::getIdentity(), 1.0));
 
-		Clusterer c(getAllocator());
+		Clusterer c;
 
-		c.init(m_r->getWidth() / 64, m_r->getHeight() / 64, 20);
+		c.init(getAllocator(), m_r->getWidth() / 64, m_r->getHeight() / 64, 30);
 		//c.init(5, 3, 10);
-		c.prepare(fr, SArray<Vec2>());
+		c.prepare(fr);
 
 		CollisionDebugDrawer cd(m_drawer);
 		m_drawer->setColor(Vec4(1.0, 0.0, 0.0, 1.0));
@@ -200,7 +200,7 @@ Error Dbg::run(CommandBufferPtr& cmdb)
 		sp.getSpatialCollisionShape().accept(cd);
 
 		ClustererTestResult rez;
-		c.initTempTestResults(getAllocator(), rez);
+		c.initTestResults(getAllocator(), rez);
 
 		c.bin(sp.getSpatialCollisionShape(), rez);
 

+ 11 - 69
src/renderer/Is.cpp

@@ -116,60 +116,6 @@ public:
 	}
 };
 
-/// Visitor that transforms a collision object.
-class ShapeTransformer final: public CollisionShape::ConstVisitor
-{
-public:
-	Sphere m_sphere;
-	ConvexHullShape m_hull;
-	Array<Vec4, 5> m_hullPoints;
-	CollisionShape* m_outShape = nullptr;
-	Transform* m_trf = nullptr;
-
-	void visit(const LineSegment&)
-	{
-		ANKI_ASSERT(0);
-	}
-
-	void visit(const Obb&)
-	{
-		ANKI_ASSERT(0);
-	}
-
-	void visit(const Plane&)
-	{
-		ANKI_ASSERT(0);
-	}
-
-	void visit(const Sphere& s)
-	{
-		m_sphere = s;
-		m_sphere.transform(*m_trf);
-		m_outShape = &m_sphere;
-	}
-
-	void visit(const Aabb&)
-	{
-		ANKI_ASSERT(0);
-	}
-
-	void visit(const CompoundShape&)
-	{
-	}
-
-	void visit(const ConvexHullShape& hull)
-	{
-		ANKI_ASSERT(hull.getPointsCount() == m_hullPoints.getSize());
-		memcpy(&m_hullPoints[0], hull.getPoints(), sizeof(m_hullPoints));
-		for(Vec4& p : m_hullPoints)
-		{
-			p = m_trf->transform(p);
-		}
-		m_hull.initStorage(&m_hullPoints[0], m_hullPoints.getSize());
-		m_outShape = &m_hull;
-	}
-};
-
 //==============================================================================
 // Is                                                                          =
 //==============================================================================
@@ -339,8 +285,8 @@ Error Is::initInternal(const ConfigSet& config)
 
 		init.m_storageBuffers[0].m_buffer = m_pLightsBuffs[i];
 		init.m_storageBuffers[1].m_buffer = m_sLightsBuffs[i];
-		init.m_storageBuffers[3].m_buffer = m_clusterBuffers[i];
-		init.m_storageBuffers[4].m_buffer = m_lightIdsBuffers[i];
+		init.m_storageBuffers[2].m_buffer = m_clusterBuffers[i];
+		init.m_storageBuffers[3].m_buffer = m_lightIdsBuffers[i];
 
 		m_rcGroups[i] = getGrManager().newInstance<ResourceGroup>(init);
 	}
@@ -580,11 +526,16 @@ void Is::binLights(U32 threadId, PtrSize threadsCount, TaskCommonData& task)
 		const U countS = cluster.m_spotCount.load();
 		const U count = countP + countS;
 
-		const U offset = task.m_lightIdsCount.fetchAdd(count);
-
 		auto& c = task.m_clusters[i];
 		c.m_combo = 0;
 
+		if(ANKI_UNLIKELY(count == 0))
+		{
+			continue;
+		}
+
+		const U offset = task.m_lightIdsCount.fetchAdd(count);
+
 		if(offset + count <= m_maxLightIds)
 		{
 			ANKI_ASSERT(offset <= 0xFFFF);
@@ -608,8 +559,6 @@ void Is::binLights(U32 threadId, PtrSize threadsCount, TaskCommonData& task)
 		}
 		else
 		{
-			memset(&c, 0, sizeof(c));
-
 			ANKI_LOGW("Light IDs buffer too small");
 		}
 	}
@@ -720,15 +669,8 @@ void Is::binLight(
 	TaskCommonData& task,
 	ClustererTestResult& testResult)
 {
-	// Transform the spatial collision shape to view space for the cluster tests
-	FrustumComponent& frc =
-		m_r->getActiveCamera().getComponent<FrustumComponent>();
-	Transform viewTrf(frc.getViewMatrix());
-	ShapeTransformer transformer;
-	transformer.m_trf = &viewTrf;
-	sp.getSpatialCollisionShape().accept(transformer);
-
-	m_r->getClusterer().bin(*transformer.m_outShape, testResult);
+	m_r->getClusterer().bin(sp.getSpatialCollisionShape(), sp.getAabb(),
+		testResult);
 
 	// Bin to the correct tiles
 	auto it = testResult.getClustersBegin();

+ 3 - 11
src/renderer/Renderer.cpp

@@ -15,7 +15,6 @@
 #include "anki/renderer/Fs.h"
 #include "anki/renderer/Lf.h"
 #include "anki/renderer/Dbg.h"
-#include "anki/renderer/Tiler.h"
 
 namespace anki {
 
@@ -68,7 +67,7 @@ Error Renderer::initInternal(const ConfigSet& config)
 	m_tileCount = m_tileCountXY.x() * m_tileCountXY.y();
 
 	m_clusterer.init(getAllocator(), m_tileCountXY.x(), m_tileCountXY.y(),
-		CLUSTER_SPLIT_COUNT);
+		config.getNumber("clusterSizeZ"));
 
 	m_tessellation = config.getNumber("tessellation");
 
@@ -106,9 +105,6 @@ Error Renderer::initInternal(const ConfigSet& config)
 	m_ms.reset(m_alloc.newInstance<Ms>(this));
 	ANKI_CHECK(m_ms->init(config));
 
-	m_tiler.reset(m_alloc.newInstance<Tiler>(this));
-	ANKI_CHECK(m_tiler->init());
-
 	m_is.reset(m_alloc.newInstance<Is>(this));
 	ANKI_CHECK(m_is->init(config));
 
@@ -145,8 +141,7 @@ Error Renderer::render(SceneNode& frustumableNode,
 	}
 
 	ANKI_ASSERT(frc.getFrustum().getType() == Frustum::Type::PERSPECTIVE);
-	m_clusterer.prepare(
-		static_cast<const PerspectiveFrustum&>(frc.getFrustum()));
+	m_clusterer.prepare(getThreadPool(), frustumableNode);
 
 	ANKI_COUNTER_START_TIMER(RENDERER_MS_TIME);
 	ANKI_CHECK(m_ms->run(cmdb[0]));
@@ -156,7 +151,6 @@ Error Renderer::render(SceneNode& frustumableNode,
 
 	m_ms->generateMipmaps(cmdb[0]);
 
-	m_tiler->runMinMax(cmdb[0]);
 	cmdb[0]->flush();
 
 	ANKI_COUNTER_START_TIMER(RENDERER_IS_TIME);
@@ -259,8 +253,6 @@ void Renderer::createDrawQuadPipeline(
 
 //==============================================================================
 void Renderer::prepareForVisibilityTests(Camera& cam)
-{
-	m_tiler->updateTiles(cam);
-}
+{}
 
 } // end namespace anki

+ 468 - 130
src/scene/Clusterer.cpp

@@ -5,15 +5,59 @@
 
 #include "anki/scene/Clusterer.h"
 #include "anki/scene/FrustumComponent.h"
+#include "anki/scene/MoveComponent.h"
+#include "anki/scene/SceneNode.h"
 #include "anki/util/Rtti.h"
 
 namespace anki {
 
+//==============================================================================
+// Misc                                                                        =
+//==============================================================================
+
+//==============================================================================
+class UpdatePlanesPerspectiveCameraTask: public ThreadPool::Task
+{
+public:
+	Clusterer* m_clusterer = nullptr;
+	Bool m_frustumChanged;
+
+	Error operator()(U32 threadId, PtrSize threadsCount)
+	{
+		m_clusterer->update(threadId, threadsCount, m_frustumChanged);
+		return ErrorCode::NONE;
+	}
+};
+
+//==============================================================================
+static Vec4 unproject(const F32 depth, const Vec2& ndc, const Vec4& projParams)
+{
+	Vec4 view;
+	F32 z = projParams.z() / (projParams.w() + depth);
+	Vec2 viewxy = ndc * projParams.xy() * z;
+	view.x() = viewxy.x();
+	view.y() = viewxy.y();
+	view.z() = z;
+	view.w() = 0.0;
+
+	return view;
+}
+
+//==============================================================================
+// Clusterer                                                                   =
+//==============================================================================
+
+//==============================================================================
+Clusterer::~Clusterer()
+{
+	m_allPlanes.destroy(m_alloc);
+}
+
 //==============================================================================
 void Clusterer::initTestResults(const GenericMemoryPoolAllocator<U8>& alloc,
 	ClustererTestResult& rez) const
 {
-	rez.m_clusterIds.create(alloc, m_clusters.getSize());
+	rez.m_clusterIds.create(alloc, getClusterCount());
 	rez.m_count = 0;
 	rez.m_alloc = alloc;
 }
@@ -26,7 +70,7 @@ F32 Clusterer::calcNear(U k) const
 }
 
 //==============================================================================
-U Clusterer::calcK(F32 zVspace) const
+U Clusterer::calcZ(F32 zVspace) const
 {
 	zVspace = clamp(zVspace, -m_far, -m_near);
 	zVspace = -zVspace;
@@ -38,96 +82,294 @@ U Clusterer::calcK(F32 zVspace) const
 }
 
 //==============================================================================
-void Clusterer::prepare(const PerspectiveFrustum& fr)
+void Clusterer::calcPlaneY(U i, const Vec4& projParams)
+{
+	Plane& plane = m_planesY[i];
+	F32 y = F32(i + 1) / m_counts[1] * 2.0 - 1.0;
+
+	Vec4 viewA = unproject(1.0, Vec2(-1.0, y), projParams);
+	Vec4 viewB = unproject(1.0, Vec2(1.0, y), projParams);
+
+	Vec4 n = viewB.cross(viewA);
+	n.normalize();
+
+	plane = Plane(n, 0.0);
+}
+
+//==============================================================================
+void Clusterer::calcPlaneX(U j, const Vec4& projParams)
+{
+	Plane& plane = m_planesX[j];
+	F32 x = F32(j + 1) / m_counts[0] * 2.0 - 1.0;
+
+	Vec4 viewA = unproject(1.0, Vec2(x, -1.0), projParams);
+	Vec4 viewB = unproject(1.0, Vec2(x, 1.0), projParams);
+
+	Vec4 n = viewA.cross(viewB);
+	n.normalize();
+
+	plane = Plane(n, 0.0);
+}
+
+//==============================================================================
+void Clusterer::init(const GenericMemoryPoolAllocator<U8>& alloc,
+	U clusterCountX, U clusterCountY, U clusterCountZ)
+{
+	m_alloc = alloc;
+	m_counts[0] = clusterCountX;
+	m_counts[1] = clusterCountY;
+	m_counts[2] = clusterCountZ;
+
+	// Init planes. One plane for each direction, plus near/far plus the world
+	// space of those
+	U planesCount =
+		(m_counts[0] - 1) * 2 // planes J
+		+ (m_counts[1] - 1) * 2 // planes I
+		+ 2; // Near and far planes
+
+	m_allPlanes.create(m_alloc, planesCount);
+
+	Plane* base = &m_allPlanes[0];
+	U count = 0;
+
+	m_planesX = std::move(SArray<Plane>(base + count, m_counts[0] - 1));
+	count += m_planesX.getSize();
+
+	m_planesY = std::move(SArray<Plane>(base + count, m_counts[1] - 1));
+	count += m_planesY.getSize();
+
+	m_planesXW = std::move(SArray<Plane>(base + count, m_counts[0] - 1));
+	count += m_planesXW.getSize();
+
+	m_planesYW = std::move(SArray<Plane>(base + count, m_counts[1] - 1));
+	count += m_planesYW.getSize();
+
+	m_nearPlane = base + count;
+	++count;
+
+	m_farPlane = base + count;
+	++count;
+
+	ANKI_ASSERT(count == m_allPlanes.getSize());
+}
+
+//==============================================================================
+void Clusterer::prepare(ThreadPool& threadPool, const SceneNode& node)
 {
 	// Get some things
-	F32 near = fr.getNear();
-	F32 far = fr.getFar();
-	F32 fovY = fr.getFovY();
-	F32 fovX = fr.getFovX();
+	const FrustumComponent& frc = node.getComponent<FrustumComponent>();
+	Timestamp frcTimestamp = frc.getTimestamp();
+	const Frustum& fr = frc.getFrustum();
+	ANKI_ASSERT(fr.getType() == Frustum::Type::PERSPECTIVE);
+	const PerspectiveFrustum& pfr = static_cast<const PerspectiveFrustum&>(fr);
+
+	// Set some things
+	m_node = &node;
+	m_frc = &frc;
+	m_near = pfr.getNear();
+	m_far = pfr.getFar();
+	m_calcNearOpt = (pfr.getFar() - m_near) / pow(m_counts[2], 2.0);
+
+	//
+	// Issue parallel jobs
+	//
+	Array<UpdatePlanesPerspectiveCameraTask, ThreadPool::MAX_THREADS> jobs;
+
+	// Do a job that transforms only the planes when:
+	// - it's the same frustum component as before and
+	// - the component has not changed
+	Bool frustumChanged =
+		frcTimestamp >= m_planesLSpaceTimestamp || m_node != &node;
+
+	for(U i = 0; i < threadPool.getThreadsCount(); i++)
+	{
+		jobs[i].m_clusterer = this;
+		jobs[i].m_frustumChanged = frustumChanged;
+		threadPool.assignNewTask(i, &jobs[i]);
+	}
 
-	if(near == m_near && far == m_far && m_fovY == fovY && m_fovX == fovX)
+	// Update timestamp
+	if(frustumChanged)
 	{
-		// Not dirty, early exit
-		return;
+		m_planesLSpaceTimestamp = frcTimestamp;
 	}
 
-	m_fovY = fovY;
-	m_fovX = fovX;
-	m_near = near;
-	m_far = far;
+	// Sync threads
+	Error err = threadPool.waitForAllThreadsToFinish();
+	(void)err;
+}
+
+//==============================================================================
+void Clusterer::computeSplitRange(const CollisionShape& cs, U& zBegin,
+	U& zEnd) const
+{
+	// Find the distance between cs and near plane
+	F32 dist = cs.testPlane(*m_nearPlane);
+	dist = max(0.0f, dist);
+
+	// Find split
+	zBegin = calcZ(-dist);
+	ANKI_ASSERT(zBegin <= m_counts[2]);
+
+	// Find the distance between cs and far plane
+	dist = cs.testPlane(*m_farPlane);
+	dist = max(0.0f, dist);
+	dist = m_far - dist;
+
+	// Find split
+	zEnd = calcZ(-dist) + 1;
+	ANKI_ASSERT(zEnd <= m_counts[2]);
+}
 
-	m_calcNearOpt = (m_far - m_near) / pow(m_counts[2], 2.0);
+//==============================================================================
+void Clusterer::bin(const CollisionShape& cs, const Aabb& csBox,
+	ClustererTestResult& rez) const
+{
+	rez.m_count = 0;
 
-	// Alloc and init clusters
-	U clusterCount = m_counts[0] * m_counts[1] * m_counts[2];
-	if(clusterCount != m_clusters.getSize())
+	if(isa<Sphere>(cs))
 	{
-		m_clusters.resize(m_alloc, clusterCount);
+		binSphere(dcast<const Sphere&>(cs), csBox, rez);
 	}
-
-	if(m_counts[2] != m_splitInfo.getSize())
+	else
 	{
-		m_splitInfo.resize(m_alloc, m_counts[2]);
+		U zBegin, zEnd;
+		computeSplitRange(cs, zBegin, zEnd);
+		binGeneric(cs, 0, m_counts[0], 0, m_counts[1], zBegin, zEnd, rez);
 	}
-
-	initClusters();
 }
 
 //==============================================================================
-void Clusterer::initClusters()
+void Clusterer::binSphere(const Sphere& s, const Aabb& aabb,
+	ClustererTestResult& rez) const
 {
-	PerspectiveFrustum::calculateProjectionMatrix(m_fovX, m_fovY, m_near, m_far,
-		m_projMat);
-	Mat4 invProj = m_projMat.getInverse();
+	const Mat4& vp = m_frc->getViewProjectionMatrix();
+	const Mat4& v = m_frc->getViewMatrix();
+
+	const Vec4& scent = s.getCenter();
+	const F32 srad = s.getRadius();
+
+	U zBegin, zEnd;
+	computeSplitRange(s, zBegin, zEnd);
 
-	// For every claster
-	for(U z = 0; z < m_counts[2]; ++z)
+	// Do a quick check
+	Vec4 eye = m_frc->getFrustumOrigin() - scent;
+	if(ANKI_UNLIKELY(eye.getLengthSquared() <= srad * srad))
 	{
-		for(U y = 0; y < m_counts[1]; ++y)
+		// Camera totaly inside the sphere
+		for(U z = zBegin; z < zEnd; ++z)
 		{
-			for(U x = 0; x < m_counts[0]; ++x)
+			for(U y = 0; y < m_counts[1]; ++y)
 			{
-				// Compute projection matrix
-				F32 near = calcNear(z);
-				F32 far = calcNear(z + 1);
-				if(far > m_far)
+				for(U x = 0; x < m_counts[0]; ++x)
 				{
-					far = m_far;
+					rez.pushBack(x, y, z);
 				}
+			}
+		}
+		return;
+	}
+
+	// Compute projection points
+	const Vec4& minv = aabb.getMin();
+	const Vec4& maxv = aabb.getMax();
+	Array<Vec4, 8> points;
+	points[0] = minv.xyz1();
+	points[1] = Vec4(minv.x(), maxv.y(), minv.z(), 1.0);
+	points[2] = Vec4(minv.x(), maxv.y(), maxv.z(), 1.0);
+	points[3] = Vec4(minv.x(), minv.y(), maxv.z(), 1.0);
+	points[4] = maxv.xyz1();
+	points[5] = Vec4(maxv.x(), minv.y(), maxv.z(), 1.0);
+	points[6] = Vec4(maxv.x(), minv.y(), minv.z(), 1.0);
+	points[7] = Vec4(maxv.x(), maxv.y(), minv.z(), 1.0);
+	Vec2 min2(MAX_F32), max2(MIN_F32);
+	for(Vec4& p : points)
+	{
+		p = vp * p;
+		p /= abs(p.w());
+
+		for(U i = 0; i < 2; ++i)
+		{
+			min2[i] = min(min2[i], p[i]);
+			max2[i] = max(max2[i], p[i]);
+		}
+	}
+
+	min2 = min2 * 0.5 + 0.5;
+	max2 = max2 * 0.5 + 0.5;
 
-				// Project some points
-				Vec2 tileMin, tileMax;
-				tileMin.x() = F32(x) / m_counts[0];
-				tileMax.x() = F32(x + 1) / m_counts[0];
-				tileMin.y() = F32(y) / m_counts[1];
-				tileMax.y() = F32(y + 1) / m_counts[1];
+	// Do a box test
+	F32 tcountX = m_counts[0];
+	F32 tcountY = m_counts[1];
 
-				tileMin = tileMin * 2.0 - 1.0;
-				tileMax = tileMax * 2.0 - 1.0;
+	I xBegin = floor(tcountX * min2.x());
+	xBegin = clamp<I>(xBegin, 0, m_counts[0]);
 
-				Vec4 min = invProj * Vec4(tileMin, 0.0, 1.0);
-				min.w() = 0.0;
-				Plane p(Vec4(0.0, 0.0, -1.0, 0.0), far);
-				p.intersectVector(min, min);
+	I xEnd = ceil(tcountX * max2.x());
+	xEnd = min<U>(xEnd, m_counts[0]);
 
-				Vec4 max = invProj * Vec4(tileMax, 0.0, 1.0);
-				max.w() = 0.0;
-				p.intersectVector(max, max);
-				max.z() = -near;
+	I yBegin = floor(tcountY * min2.y());
+	yBegin = clamp<I>(yBegin, 0, m_counts[1]);
 
-				ANKI_ASSERT(min.xyz() < max.xyz());
-				cluster(x, y, z).m_min = min.xyz();
-				cluster(x, y, z).m_max = max.xyz();
+	I yEnd = ceil(tcountY * max2.y());
+	yEnd = min<I>(yEnd, m_counts[1]);
 
-				// Set split info
-				if(x == 0 && y == 0)
+	ANKI_ASSERT(xBegin >= 0 && xBegin <= tcountX
+		&& xEnd >= 0 && xEnd <= tcountX);
+	ANKI_ASSERT(yBegin >= 0 && yBegin <= tcountX
+		&& yEnd >= 0 && yBegin <= tcountY);
+
+	Vec2 tileSize(1.0 / tcountX, 1.0 / tcountY);
+
+	Vec4 a = vp * s.getCenter().xyz1();
+	Vec2 c = a.xy() / a.w();
+	c = c * 0.5 + 0.5;
+
+	Vec4 sphereCenterVSpace = (v * scent.xyz1()).xyz0();
+
+	for(I y = yBegin; y < yEnd; ++y)
+	{
+		for(I x = xBegin; x < xEnd; ++x)
+		{
+			// Do detailed tests
+
+			Vec2 tileMin = Vec2(x, y) * tileSize;
+			Vec2 tileMax = Vec2(x + 1, y + 1) * tileSize;
+
+			// Find closest point of sphere center and tile
+			Vec2 cp(0.0);
+			for(U i = 0; i < 2; ++i)
+			{
+				if(c[i] > tileMax[i])
 				{
-					Vec2 xy(min.x(), min.y());
-					Vec2 sizes(max.x() - min.x(), max.y() - min.y());
+					cp[i] = tileMax[i];
+				}
+				else if (c[i] < tileMin[i])
+				{
+					cp[i] = tileMin[i];
+				}
+				else
+				{
+					// the c lies between min and max
+					cp[i] = c[i];
+				}
+			}
+
+			// Unproject the closest point to view space
+			Vec4 view = unproject(
+				1.0, cp * 2.0 - 1.0, m_frc->getProjectionParameters());
 
-					m_splitInfo[z].m_xy = xy;
-					m_splitInfo[z].m_sizes = sizes;
+			// Do a simple ray-sphere test
+			Vec4 dir = view;
+			Vec4 proj = sphereCenterVSpace.getProjection(dir);
+			F32 lenSq = (sphereCenterVSpace - proj).getLengthSquared();
+			Bool inside = lenSq <= (srad * srad);
+
+			if(inside)
+			{
+				for(U z = zBegin; z < zEnd; ++z)
+				{
+					rez.pushBack(x, y, z);
 				}
 			}
 		}
@@ -135,112 +377,208 @@ void Clusterer::initClusters()
 }
 
 //==============================================================================
-void Clusterer::bin(const CollisionShape& cs, ClustererTestResult& rez) const
+void Clusterer::binGeneric(const CollisionShape& cs, U xBegin, U xEnd, U yBegin,
+	U yEnd, U zBegin, U zEnd, ClustererTestResult& rez) const
 {
-	rez.m_count = 0;
-
-	Aabb box;
-	cs.computeAabb(box);
+	U my = (yEnd - yBegin) / 2;
+	U mx = (xEnd - xBegin) / 2;
 
-	U beginZ, endZ;
-
-	// Find splits that cover the box
-	findSplitsFromAabb(box, beginZ, endZ);
-
-	// For each split find the x and y clusters
-	for(U z = beginZ; z < endZ; ++z)
+	// Handle final
+	if(ANKI_UNLIKELY(my == 0 && mx == 0))
 	{
-		const SplitInfo& info = m_splitInfo[z];
-
-		I beginX = -1, endX = -1, beginY = -1, endY = -1;
-
-		// Find beginX
-		F32 x = box.getMin().x();
-		if(x <= info.m_xy.x())
+		for(U z = zBegin; z < zEnd; ++z)
 		{
-			beginX = 0;
+			rez.pushBack(xBegin, yBegin, z);
 		}
-		else if(x >= (info.m_xy.x() + info.m_sizes.x() * m_counts[0]))
+		return;
+	}
+
+	// Handle the edge case
+	if(ANKI_UNLIKELY(mx == 0 || my == 0))
+	{
+		if(mx == 0)
 		{
-			beginX = m_counts[0];
+			const Plane& topPlane = m_planesYW[yBegin + my - 1];
+			F32 test = cs.testPlane(topPlane);
+
+			if(test <= 0.0)
+			{
+				binGeneric(cs, xBegin, xEnd, yBegin, yBegin + my, zBegin, zEnd,
+					rez);
+			}
+
+			if(test >= 0.0)
+			{
+				binGeneric(cs, xBegin, xEnd, yBegin + my, yEnd, zBegin, zEnd,
+					rez);
+			}
 		}
 		else
 		{
-			beginX = (x - info.m_xy.x()) / info.m_sizes.x();
+			const Plane& rightPlane = m_planesXW[xBegin + mx - 1];
+			F32 test = cs.testPlane(rightPlane);
+
+			if(test <= 0.0)
+			{
+				binGeneric(cs, xBegin, xBegin + mx, yBegin, yEnd, zBegin, zEnd,
+					rez);
+			}
+
+			if(test >= 0.0)
+			{
+				binGeneric(cs, xBegin + mx, xEnd, yBegin, yEnd, zBegin, zEnd,
+					rez);
+			}
 		}
-		ANKI_ASSERT(beginX >= 0 && beginX <= m_counts[0]);
 
-		// Find endX
-		x = box.getMax().x();
-		if(x <= info.m_xy.x())
+		return;
+	}
+
+	// Do the checks
+	Bool inside[2][2] = {{false, false}, {false, false}};
+
+	// Top looking plane check
+	{
+		// Pick the correct top lookin plane (y)
+		const Plane& topPlane = m_planesYW[yBegin + my - 1];
+
+		F32 test = cs.testPlane(topPlane);
+		if(test < 0.0)
 		{
-			endX = 0;
+			inside[0][0] = inside[0][1] = true;
 		}
-		else if(x >= (info.m_xy.x() + info.m_sizes.x() * m_counts[0]))
+		else if(test > 0.0)
 		{
-			endX = m_counts[0];
+			inside[1][0] = inside[1][1] = true;
 		}
 		else
 		{
-			endX = ceil((x - info.m_xy.x()) / info.m_sizes.x());
+			// Possibly all inside
+			for(U i = 0; i < 2; i++)
+			{
+				for(U j = 0; j < 2; j++)
+				{
+					inside[i][j] = true;
+				}
+			}
 		}
-		ANKI_ASSERT(endX >= 0 && endX <= m_counts[0]);
+	}
 
-		// Find beginY
-		F32 y = box.getMin().y();
-		if(y <= info.m_xy.y())
+	// Right looking plane check
+	{
+		// Pick the correct right looking plane (x)
+		const Plane& rightPlane = m_planesXW[xBegin + mx - 1];
+
+		F32 test = cs.testPlane(rightPlane);
+		if(test < 0.0)
 		{
-			beginY = 0;
+			inside[0][1] = inside[1][1] = false;
 		}
-		else if(y >= (info.m_xy.y() + info.m_sizes.y() * m_counts[1]))
+		else if(test > 0.0)
 		{
-			beginY = m_counts[1];
+			inside[0][0] = inside[1][0] = false;
 		}
 		else
 		{
-			beginY = (y - info.m_xy.y()) / info.m_sizes.y();
+			// Do nothing and keep the top looking plane check results
 		}
-		ANKI_ASSERT(beginY >= 0 && beginY <= m_counts[1]);
+	}
+
+	// Now move lower to the hierarchy
+	if(inside[0][0])
+	{
+		binGeneric(cs, xBegin, xBegin + mx, yBegin, yBegin + my, zBegin, zEnd,
+			rez);
+	}
+
+	if(inside[0][1])
+	{
+		binGeneric(cs, xBegin + mx, xEnd, yBegin, yBegin + my, zBegin, zEnd,
+			rez);
+	}
+
+	if(inside[1][0])
+	{
+		binGeneric(cs, xBegin, xBegin + mx, yBegin + my, yEnd, zBegin, zEnd,
+			rez);
+	}
+
+	if(inside[1][1])
+	{
+		binGeneric(cs, xBegin + mx, xEnd, yBegin + my, yEnd, zBegin, zEnd,
+			rez);
+	}
+}
+
+//==============================================================================
+void Clusterer::update(U32 threadId, PtrSize threadsCount, Bool frustumChanged)
+{
+	PtrSize start, end;
+	const MoveComponent& move = m_node->getComponent<MoveComponent>();
+	const FrustumComponent& frc = *m_frc;
+	ANKI_ASSERT(frc.getFrustum().getType() == Frustum::Type::PERSPECTIVE);
+
+	const Transform& trf = move.getWorldTransform();
+	const Vec4& projParams = frc.getProjectionParameters();
+
+	if(frustumChanged)
+	{
+		// Re-calculate the planes in local space
 
-		// Find endY
-		y = box.getMax().y();
-		if(y <= info.m_xy.y())
+		// First the top looking planes
+		ThreadPool::Task::choseStartEnd(threadId, threadsCount,
+			m_planesYW.getSize(), start, end);
+
+		for(U i = start; i < end; i++)
 		{
-			endY = 0;
+			calcPlaneY(i, projParams);
+
+			m_planesYW[i] = m_planesY[i].getTransformed(trf);
 		}
-		else if(y >= (info.m_xy.y() + info.m_sizes.y() * m_counts[1]))
+
+		// Then the right looking planes
+		ThreadPool::Task::choseStartEnd(threadId, threadsCount,
+			m_planesXW.getSize(), start, end);
+
+		for(U j = start; j < end; j++)
 		{
-			endY = m_counts[1];
+			calcPlaneX(j, projParams);
+
+			m_planesXW[j] = m_planesX[j].getTransformed(trf);
 		}
-		else
+	}
+	else
+	{
+		// Only transform planes
+
+		// First the top looking planes
+		ThreadPool::Task::choseStartEnd(threadId, threadsCount,
+			m_planesYW.getSize(), start, end);
+
+		for(U i = start; i < end; i++)
 		{
-			endY = ceil((y - info.m_xy.y()) / info.m_sizes.y());
+			m_planesYW[i] = m_planesY[i].getTransformed(trf);
 		}
-		ANKI_ASSERT(endY >= 0 && endY <= m_counts[1]);
 
-		for(I y = beginY; y < endY; ++y)
+		// Then the right looking planes
+		ThreadPool::Task::choseStartEnd(threadId, threadsCount,
+			m_planesXW.getSize(), start, end);
+
+		for(U j = start; j < end; j++)
 		{
-			for(I x = beginX; x < endX; ++x)
-			{
-				const Cluster& cl = cluster(x, y, z);
-				Aabb clusterAabb(cl.m_min.xyz0(), cl.m_max.xyz0());
-				if(testCollisionShapes(cs, clusterAabb))
-				{
-					Array<U8, 3> ids = {static_cast<U8>(x),
-						static_cast<U8>(y), static_cast<U8>(z)};
-					rez.m_clusterIds[rez.m_count++] = ids;
-				}
-			}
+			m_planesXW[j] = m_planesX[j].getTransformed(trf);
 		}
 	}
-}
 
-//==============================================================================
-void Clusterer::findSplitsFromAabb(const Aabb& box, U& zFrom, U& zTo) const
-{
-	zFrom = calcK(box.getMax().z());
-	zTo = calcK(box.getMin().z()) + 1;
-	ANKI_ASSERT(zFrom <= zTo);
+	// Finaly tranform the near and far planes
+	if(threadId == 0)
+	{
+		*m_nearPlane = Plane(Vec4(0.0, 0.0, -1.0, 0.0), m_near);
+		m_nearPlane->transform(trf);
+
+		*m_farPlane = Plane(Vec4(0.0, 0.0, 1.0, 0.0), -m_far);
+		m_farPlane->transform(trf);
+	}
 }
 
 } // end namespace anki

+ 4 - 3
testapp/Main.cpp

@@ -43,7 +43,7 @@ App* app;
 ModelNode* horse;
 PerspectiveCamera* cam;
 
-#define PLAYER 0
+#define PLAYER 1
 #define MOUSE 1
 
 Bool profile = false;
@@ -89,6 +89,7 @@ Error init()
 	cam->getComponent<MoveComponent>().
 		setLocalTransform(Transform(
 		Vec4(147.392776, -10.132728, 16.607138, 0.0),
+		//Vec4(0.0),
 		Mat3x4(Euler(toRad(0.0), toRad(90.0), toRad(0.0))),
 		1.0));
 #endif
@@ -103,7 +104,7 @@ Error init()
 	lightc->setDistance(5.0);
 
 	move = plight->tryGetComponent<MoveComponent>();
-	move->setLocalTransform(Transform(Vec4(0.0, 0.5, 0.0, 0.0),
+	move->setLocalTransform(Transform(Vec4(0.0, 0.0, 0.0, 0.0),
 		Mat3x4::getIdentity(), 1.0));
 #endif
 #if 0
@@ -491,7 +492,7 @@ Error initSubsystems(int argc, char* argv[])
 	config.set("samples", 1);
 	config.set("tessellation", true);
 	//config.set("maxTextureSize", 256);
-	config.set("fullscreenDesktopResolution", false);
+	config.set("fullscreenDesktopResolution", true);
 	config.set("debugContext", false);
 	if(getenv("ANKI_DATA_PATH"))
 	{