2
0
Эх сурвалжийг харах

Optimizing the shit out of the clusterer

Panagiotis Christopoulos Charitos 9 жил өмнө
parent
commit
b5282a582b

+ 1 - 1
src/anki/collision/CompoundShape.h

@@ -55,7 +55,7 @@ private:
 };
 
 template<typename TFunc>
-Error CompoundShape::iterateShapes(TFunc f) const
+inline Error CompoundShape::iterateShapes(TFunc f) const
 {
 	Error err = ErrorCode::NONE;
 	U count = 0;

+ 14 - 14
src/anki/collision/Frustum.cpp

@@ -115,7 +115,7 @@ void Frustum::updateInternal()
 }
 
 PerspectiveFrustum::PerspectiveFrustum()
-	: Frustum(Type::PERSPECTIVE)
+	: Frustum(FrustumType::PERSPECTIVE)
 {
 	addShape(&m_hull);
 	m_hull.initStorage(&m_pointsW[0], m_pointsW.getSize());
@@ -150,20 +150,20 @@ void PerspectiveFrustum::recalculate()
 
 	sinCos(PI + m_fovX / 2.0, s, c);
 	// right
-	m_planesL[PlaneType::RIGHT] = Plane(Vec4(c, 0.0, s, 0.0), 0.0);
+	m_planesL[FrustumPlaneType::RIGHT] = Plane(Vec4(c, 0.0, s, 0.0), 0.0);
 	// left
-	m_planesL[PlaneType::LEFT] = Plane(Vec4(-c, 0.0, s, 0.0), 0.0);
+	m_planesL[FrustumPlaneType::LEFT] = Plane(Vec4(-c, 0.0, s, 0.0), 0.0);
 
 	sinCos((PI + m_fovY) * 0.5, s, c);
 	// bottom
-	m_planesL[PlaneType::BOTTOM] = Plane(Vec4(0.0, s, c, 0.0), 0.0);
+	m_planesL[FrustumPlaneType::BOTTOM] = Plane(Vec4(0.0, s, c, 0.0), 0.0);
 	// top
-	m_planesL[PlaneType::TOP] = Plane(Vec4(0.0, -s, c, 0.0), 0.0);
+	m_planesL[FrustumPlaneType::TOP] = Plane(Vec4(0.0, -s, c, 0.0), 0.0);
 
 	// near
-	m_planesL[PlaneType::NEAR] = Plane(Vec4(0.0, 0.0, -1.0, 0.0), m_near);
+	m_planesL[FrustumPlaneType::NEAR] = Plane(Vec4(0.0, 0.0, -1.0, 0.0), m_near);
 	// far
-	m_planesL[PlaneType::FAR] = Plane(Vec4(0.0, 0.0, 1.0, 0.0), -m_far);
+	m_planesL[FrustumPlaneType::FAR] = Plane(Vec4(0.0, 0.0, 1.0, 0.0), -m_far);
 
 	// Points
 	//
@@ -185,7 +185,7 @@ Mat4 PerspectiveFrustum::calculateProjectionMatrix() const
 }
 
 OrthographicFrustum::OrthographicFrustum()
-	: Frustum(Type::ORTHOGRAPHIC)
+	: Frustum(FrustumType::ORTHOGRAPHIC)
 {
 	addShape(&m_obbW);
 }
@@ -210,13 +210,13 @@ Mat4 OrthographicFrustum::calculateProjectionMatrix() const
 void OrthographicFrustum::recalculate()
 {
 	// Planes
-	m_planesL[(U)PlaneType::LEFT] = Plane(Vec4(1.0, 0.0, 0.0, 0.0), m_left);
-	m_planesL[(U)PlaneType::RIGHT] = Plane(Vec4(-1.0, 0.0, 0.0, 0.0), -m_right);
+	m_planesL[FrustumPlaneType::LEFT] = Plane(Vec4(1.0, 0.0, 0.0, 0.0), m_left);
+	m_planesL[FrustumPlaneType::RIGHT] = Plane(Vec4(-1.0, 0.0, 0.0, 0.0), -m_right);
 
-	m_planesL[(U)PlaneType::NEAR] = Plane(Vec4(0.0, 0.0, -1.0, 0.0), m_near);
-	m_planesL[(U)PlaneType::FAR] = Plane(Vec4(0.0, 0.0, 1.0, 0.0), -m_far);
-	m_planesL[(U)PlaneType::TOP] = Plane(Vec4(0.0, -1.0, 0.0, 0.0), -m_top);
-	m_planesL[(U)PlaneType::BOTTOM] = Plane(Vec4(0.0, 1.0, 0.0, 0.0), m_bottom);
+	m_planesL[FrustumPlaneType::NEAR] = Plane(Vec4(0.0, 0.0, -1.0, 0.0), m_near);
+	m_planesL[FrustumPlaneType::FAR] = Plane(Vec4(0.0, 0.0, 1.0, 0.0), -m_far);
+	m_planesL[FrustumPlaneType::TOP] = Plane(Vec4(0.0, -1.0, 0.0, 0.0), -m_top);
+	m_planesL[FrustumPlaneType::BOTTOM] = Plane(Vec4(0.0, 1.0, 0.0, 0.0), m_bottom);
 
 	// OBB
 	Vec4 c((m_right + m_left) * 0.5, (m_top + m_bottom) * 0.5, -(m_far + m_near) * 0.5, 0.0);

+ 29 - 24
src/anki/collision/Frustum.h

@@ -18,31 +18,31 @@ namespace anki
 /// @addtogroup Collision
 /// @{
 
+/// Frustum type
+enum class FrustumType : U8
+{
+	PERSPECTIVE,
+	ORTHOGRAPHIC
+};
+
+/// The 6 frustum planes
+enum class FrustumPlaneType : U8
+{
+	NEAR,
+	FAR,
+	LEFT,
+	RIGHT,
+	TOP,
+	BOTTOM,
+	COUNT ///< Number of planes
+};
+
 /// Frustum collision shape. This shape consists from 6 planes. The planes are being used to find shapes that are
 /// inside the frustum
 class Frustum : public CompoundShape
 {
 public:
-	/// Frustum type
-	enum class Type : U8
-	{
-		PERSPECTIVE,
-		ORTHOGRAPHIC
-	};
-
-	/// The 6 frustum planes
-	enum class PlaneType : U8
-	{
-		NEAR,
-		FAR,
-		LEFT,
-		RIGHT,
-		TOP,
-		BOTTOM,
-		COUNT ///< Number of planes
-	};
-
-	Frustum(Type type)
+	Frustum(FrustumType type)
 		: m_type(type)
 	{
 	}
@@ -51,7 +51,7 @@ public:
 	{
 	}
 
-	Type getType() const
+	FrustumType getType() const
 	{
 		return m_type;
 	}
@@ -105,6 +105,11 @@ public:
 	/// Calculate the projection matrix
 	virtual Mat4 calculateProjectionMatrix() const = 0;
 
+	const Array<Plane, U(FrustumPlaneType::COUNT)>& getPlanesWorldSpace() const
+	{
+		return m_planesW;
+	}
+
 protected:
 	/// @name Viewing variables
 	/// @{
@@ -113,8 +118,8 @@ protected:
 	/// @}
 
 	/// Used to check against the frustum
-	Array<Plane, (U)PlaneType::COUNT> m_planesL;
-	Array<Plane, (U)PlaneType::COUNT> m_planesW;
+	Array<Plane, U(FrustumPlaneType::COUNT)> m_planesL;
+	Array<Plane, U(FrustumPlaneType::COUNT)> m_planesW;
 
 	/// Keep the transformation.
 	Transform m_trf = Transform::getIdentity();
@@ -136,7 +141,7 @@ protected:
 	Frustum& operator=(const Frustum& b);
 
 private:
-	Type m_type;
+	FrustumType m_type;
 };
 
 /// Frustum shape for perspective cameras

+ 12 - 12
src/anki/collision/Functions.cpp

@@ -21,40 +21,40 @@ void extractClipPlanes(const Mat4& mvp, Array<Plane*, 6>& planes)
 	// Plane equation coefficients
 	Vec4 abcd;
 
-	if(planes[Frustum::PlaneType::NEAR])
+	if(planes[FrustumPlaneType::NEAR])
 	{
 		abcd = mvp.getRow(3) + mvp.getRow(2);
-		setPlane(abcd, *planes[Frustum::PlaneType::NEAR]);
+		setPlane(abcd, *planes[FrustumPlaneType::NEAR]);
 	}
 
-	if(planes[Frustum::PlaneType::FAR])
+	if(planes[FrustumPlaneType::FAR])
 	{
 		abcd = mvp.getRow(3) - mvp.getRow(2);
-		setPlane(abcd, *planes[Frustum::PlaneType::FAR]);
+		setPlane(abcd, *planes[FrustumPlaneType::FAR]);
 	}
 
-	if(planes[Frustum::PlaneType::LEFT])
+	if(planes[FrustumPlaneType::LEFT])
 	{
 		abcd = mvp.getRow(3) + mvp.getRow(0);
-		setPlane(abcd, *planes[Frustum::PlaneType::LEFT]);
+		setPlane(abcd, *planes[FrustumPlaneType::LEFT]);
 	}
 
-	if(planes[Frustum::PlaneType::RIGHT])
+	if(planes[FrustumPlaneType::RIGHT])
 	{
 		abcd = mvp.getRow(3) - mvp.getRow(0);
-		setPlane(abcd, *planes[Frustum::PlaneType::RIGHT]);
+		setPlane(abcd, *planes[FrustumPlaneType::RIGHT]);
 	}
 
-	if(planes[Frustum::PlaneType::TOP])
+	if(planes[FrustumPlaneType::TOP])
 	{
 		abcd = mvp.getRow(3) - mvp.getRow(1);
-		setPlane(abcd, *planes[Frustum::PlaneType::TOP]);
+		setPlane(abcd, *planes[FrustumPlaneType::TOP]);
 	}
 
-	if(planes[Frustum::PlaneType::BOTTOM])
+	if(planes[FrustumPlaneType::BOTTOM])
 	{
 		abcd = mvp.getRow(3) + mvp.getRow(1);
-		setPlane(abcd, *planes[Frustum::PlaneType::BOTTOM]);
+		setPlane(abcd, *planes[FrustumPlaneType::BOTTOM]);
 	}
 }
 

+ 4 - 2
src/anki/collision/Sphere.cpp

@@ -41,6 +41,7 @@ Sphere Sphere::getTransformed(const Transform& trf) const
 
 	newSphere.m_center = trf.transform(m_center);
 	newSphere.m_radius = m_radius * trf.getScale();
+	newSphere.m_radiusSq = newSphere.m_radius * newSphere.m_radius;
 	return newSphere;
 }
 
@@ -136,6 +137,7 @@ void Sphere::setFromPointCloud(const void* buff, U count, PtrSize stride, PtrSiz
 	});
 
 	m_radius = sqrt(maxDist);
+	m_radiusSq = m_radius * m_radius;
 }
 
 Vec4 Sphere::computeSupport(const Vec4& dir) const
@@ -154,12 +156,12 @@ Bool Sphere::intersectsRay(
 	const Vec4& o = rayOrigin;
 	const Vec4& l = rayDir;
 	const Vec4& c = m_center;
-	F32 R = m_radius;
+	F32 R2 = m_radiusSq;
 
 	Vec4 o_c = o - c;
 
 	F32 a = l.dot(o_c);
-	F32 b = a * a - o_c.getLengthSquared() + R * R;
+	F32 b = a * a - o_c.getLengthSquared() + R2;
 
 	if(b < 0.0)
 	{

+ 7 - 7
src/anki/collision/Sphere.h

@@ -38,6 +38,7 @@ public:
 		: Base(CollisionShapeType::SPHERE)
 		, m_center(center)
 		, m_radius(radius)
+		, m_radiusSq(radius * radius)
 	{
 	}
 
@@ -46,11 +47,6 @@ public:
 		return m_center;
 	}
 
-	Vec4& getCenter()
-	{
-		return m_center;
-	}
-
 	void setCenter(const Vec4& x)
 	{
 		m_center = x;
@@ -61,14 +57,16 @@ public:
 		return m_radius;
 	}
 
-	F32& getRadius()
+	F32 getRadiusSquared() const
 	{
-		return m_radius;
+		ANKI_ASSERT(m_radiusSq == m_radius * m_radius);
+		return m_radiusSq;
 	}
 
 	void setRadius(const F32 x)
 	{
 		m_radius = x;
+		m_radiusSq = x * x;
 	}
 
 	Sphere& operator=(const Sphere& b)
@@ -76,6 +74,7 @@ public:
 		Base::operator=(b);
 		m_center = b.m_center;
 		m_radius = b.m_radius;
+		m_radiusSq = b.m_radiusSq;
 		return *this;
 	}
 
@@ -121,6 +120,7 @@ public:
 private:
 	Vec4 m_center;
 	F32 m_radius;
+	F32 m_radiusSq;
 };
 /// @}
 

+ 14 - 7
src/anki/collision/Tests.cpp

@@ -101,8 +101,17 @@ static Bool test(const Aabb& aabb, const Sphere& s)
 {
 	const Vec4& c = s.getCenter();
 
-	// find the box's closest point to the sphere
-	Vec4 cp(0.0); // Closest Point
+// find the box's closest point to the sphere
+#if ANKI_SIMD == ANKI_SIMD_SSE
+	__m128 gt = _mm_cmpgt_ps(c.getSimd(), aabb.getMax().getSimd());
+	__m128 lt = _mm_cmplt_ps(c.getSimd(), aabb.getMin().getSimd());
+
+	__m128 m = _mm_or_ps(_mm_and_ps(gt, aabb.getMax().getSimd()), _mm_andnot_ps(gt, c.getSimd()));
+	__m128 n = _mm_or_ps(_mm_and_ps(lt, aabb.getMin().getSimd()), _mm_andnot_ps(lt, m));
+
+	Vec4 cp(n);
+#else
+	Vec4 cp(c); // Closest Point
 	for(U i = 0; i < 3; i++)
 	{
 		// if the center is greater than the max then the closest point is the max
@@ -116,18 +125,16 @@ static Bool test(const Aabb& aabb, const Sphere& s)
 		}
 		else
 		{
-			// the c lies between min and max
-			cp[i] = c[i];
+			// the c lies between min and max, do nothing
 		}
 	}
-
-	F32 rsq = s.getRadius() * s.getRadius();
+#endif
 
 	// if the c lies totally inside the box then the sub is the zero, this means that the length is also zero and thus
 	// it's always smaller than rsq
 	Vec4 sub = c - cp;
 
-	if(sub.getLengthSquared() <= rsq)
+	if(sub.getLengthSquared() <= s.getRadiusSquared())
 	{
 		return true;
 	}

+ 6 - 6
src/anki/event/LightEvent.cpp

@@ -16,14 +16,14 @@ Error LightEvent::init(F32 startTime, F32 duration, SceneNode* light)
 
 	LightComponent& lightc = light->getComponent<LightComponent>();
 
-	switch(lightc.getLightType())
+	switch(lightc.getLightComponentType())
 	{
-	case LightComponent::LightType::POINT:
+	case LightComponentType::POINT:
 	{
 		m_originalRadius = lightc.getRadius();
 	}
 	break;
-	case LightComponent::LightType::SPOT:
+	case LightComponentType::SPOT:
 		ANKI_ASSERT("TODO");
 		break;
 	default:
@@ -47,12 +47,12 @@ Error LightEvent::update(F32 prevUpdateTime, F32 crntTime)
 	// Update radius
 	if(m_radiusMultiplier != 0.0)
 	{
-		switch(lightc.getLightType())
+		switch(lightc.getLightComponentType())
 		{
-		case LightComponent::LightType::POINT:
+		case LightComponentType::POINT:
 			lightc.setRadius(m_originalRadius + factor * m_radiusMultiplier);
 			break;
-		case LightComponent::LightType::SPOT:
+		case LightComponentType::SPOT:
 			ANKI_ASSERT("TODO");
 			break;
 		default:

+ 1 - 1
src/anki/math/Functions.cpp

@@ -29,7 +29,7 @@ static void sinCosInternal(const Scalar a_, Scalar& sina, Scalar& cosa)
 		a = -a;
 		negative = true;
 	}
-	const Scalar TWO_OVER_PI = 1.0 / (getPi<Scalar>() / 2.0);
+	const Scalar TWO_OVER_PI = 1.0 / (PI / 2.0);
 	Scalar floatA = TWO_OVER_PI * a;
 	I intA = (int)floatA;
 

+ 50 - 0
src/anki/math/Mat4.h

@@ -410,6 +410,56 @@ public:
 
 		return m;
 	}
+
+	/// Given the parameters that construct a projection matrix extract 4 values that can be used to unproject a point
+	/// from NDC to view space.
+	/// @code
+	/// Vec4 unprojParams = calculatePerspectiveUnprojectionParams(...);
+	/// F32 z = unprojParams.z() / (unprojParams.w() + depth);
+	/// Vec2 xy = ndc.xy() * unprojParams.xy() * z;
+	/// Vec3 posViewSpace(xy, z);
+	/// @endcode
+	static TVec4<T> calculatePerspectiveUnprojectionParams(T fovX, T fovY, T near, T far)
+	{
+		TVec4<T> out;
+		T g = near - far;
+		T f = T(1) / tan(fovY / T(2)); // f = cot(fovY/2)
+
+		T m00 = f * (fovY / fovX);
+		T m11 = f;
+		T m22 = (far + near) / g;
+		T m23 = (T(2) * far * near) / g;
+
+		// First, z' = (m * Pv) / 2 + 0.5 where Pv is the view space position.
+		// Solving that for Pv.z we get
+		// Pv.z = A / (z' + B)
+		// where A = (-m23 / 2) and B = (m22 / 2 - 0.5)
+		// so we save the A and B in the projection params vector
+		out.z() = -m23 * T(0.5);
+		out.w() = m22 * T(0.5) - T(0.5);
+
+		// Using the same logic the Pv.x = x' * w / m00
+		// so Pv.x = x' * Pv.z * (-1 / m00)
+		out.x() = -T(1.0) / m00;
+
+		// Same for y
+		out.y() = -T(1.0) / m11;
+
+		return out;
+	}
+
+	/// Assuming this is a projection matrix extract the unprojection parameters. See
+	/// calculatePerspectiveUnprojectionParams for more info.
+	TVec4<T> extractPerspectiveUnprojectionParams() const
+	{
+		TVec4<T> out;
+		const TMat4& m = *this;
+		out.z() = -m(2, 3) * T(0.5);
+		out.w() = m(2, 2) * T(0.5) - T(0.5);
+		out.x() = -T(1.0) / m(0, 0);
+		out.y() = -T(1.0) / m(1, 1);
+		return out;
+	}
 	/// @}
 };
 

+ 3 - 0
src/anki/math/Vec4.h

@@ -212,6 +212,9 @@ TVec4<F32> TVec4<F32>::cross(const TVec4<F32>& b) const;
 template<>
 TVec4<F32> TVec4<F32>::Base::getAbs() const;
 
+template<>
+F32 TVec4<F32>::Base::getLengthSquared() const;
+
 #elif ANKI_SIMD == ANKI_SIMD_NEON
 
 #error "TODO"

+ 8 - 0
src/anki/math/Vec4.inl.h

@@ -167,6 +167,14 @@ inline TVec4<F32> TVec4<F32>::Base::getAbs() const
 	return TVec4<F32>(_mm_andnot_ps(signMask, m_simd));
 }
 
+template<>
+inline F32 TVec4<F32>::Base::getLengthSquared() const
+{
+	F32 o;
+	_mm_store_ss(&o, _mm_dp_ps(m_simd, m_simd, 0xF1));
+	return o;
+}
+
 #elif ANKI_SIMD == ANKI_SIMD_NEON
 
 #error "TODO"

+ 331 - 133
src/anki/renderer/Clusterer.cpp

@@ -8,6 +8,7 @@
 #include <anki/scene/MoveComponent.h>
 #include <anki/scene/SceneNode.h>
 #include <anki/util/ThreadPool.h>
+#include <vector>
 
 namespace anki
 {
@@ -38,28 +39,21 @@ static Vec4 unproject(const F32 depth, const Vec2& ndc, const Vec4& projParams)
 	return view;
 }
 
-Clusterer::~Clusterer()
+static Vec4 unprojectZViewSpace(const F32 zVSpace, const Vec2& ndc, const Vec4& projParams)
 {
-	m_allPlanes.destroy(m_alloc);
+	Vec4 view;
+	view.x() = ndc.x() * projParams.x();
+	view.y() = ndc.y() * projParams.y();
+	view.z() = 1.0;
+	view.w() = 0.0;
+
+	return view * zVSpace;
 }
 
-void Clusterer::initDisk()
+Clusterer::~Clusterer()
 {
-	ANKI_ASSERT(m_disk.getSize() == 8 && "Assumes that");
-
-	// The radius is a little bit bigger than 1.0
-	F32 radius = 1.0 / cos(PI / 8.0);
-
-	for(U i = 0; i < 8; ++i)
-	{
-		F32 ang = (PI / 2.0) / 8.0;
-		ang = i * ang;
-
-		F32 x = cos(ang) * radius;
-		F32 y = sin(ang) * radius;
-
-		m_disk[i] = Vec4(x, y, 0.0, 0.0);
-	}
+	m_allPlanes.destroy(m_alloc);
+	m_clusterBoxes.destroy(m_alloc);
 }
 
 void Clusterer::initTestResults(const GenericMemoryPoolAllocator<U8>& alloc, ClustererTestResult& rez) const
@@ -114,6 +108,49 @@ void Clusterer::calcPlaneX(U j, const Vec4& projParams)
 	plane = Plane(n, 0.0);
 }
 
+void Clusterer::setClusterBoxes(const Vec4& projParams, U begin, U end)
+{
+	ANKI_ASSERT((m_counts[0] % 2) == 0);
+	ANKI_ASSERT((m_counts[1] % 2) == 0);
+	const U c = m_counts[0] * m_counts[1];
+
+	for(U i = begin; i < end; ++i)
+	{
+		U z = i / c;
+		U y = (i % c) / m_counts[0];
+		U x = (i % c) % m_counts[0];
+
+		F32 zMax = -calcNear(z);
+		F32 zMin = -calcNear(z + 1);
+
+		F32 xMin, xMax;
+		if(x < m_counts[0] / 2)
+		{
+			xMin = (F32(x) / m_counts[0] * 2.0 - 1.0) * projParams.x() * zMin;
+			xMax = (F32(x + 1) / m_counts[0] * 2.0 - 1.0) * projParams.x() * zMax;
+		}
+		else
+		{
+			xMin = (F32(x) / m_counts[0] * 2.0 - 1.0) * projParams.x() * zMax;
+			xMax = (F32(x + 1) / m_counts[0] * 2.0 - 1.0) * projParams.x() * zMin;
+		}
+
+		F32 yMin, yMax;
+		if(y < m_counts[1] / 2)
+		{
+			yMin = (F32(y) / m_counts[1] * 2.0 - 1.0) * projParams.y() * zMin;
+			yMax = (F32(y + 1) / m_counts[1] * 2.0 - 1.0) * projParams.y() * zMax;
+		}
+		else
+		{
+			yMin = (F32(y) / m_counts[1] * 2.0 - 1.0) * projParams.y() * zMax;
+			yMax = (F32(y + 1) / m_counts[1] * 2.0 - 1.0) * projParams.y() * zMin;
+		}
+
+		m_clusterBoxes[i] = Aabb(Vec4(xMin, yMin, zMin, 0.0), Vec4(xMax, yMax, zMax, 0.0));
+	}
+}
+
 void Clusterer::init(const GenericMemoryPoolAllocator<U8>& alloc, U clusterCountX, U clusterCountY, U clusterCountZ)
 {
 	m_alloc = alloc;
@@ -150,22 +187,23 @@ void Clusterer::init(const GenericMemoryPoolAllocator<U8>& alloc, U clusterCount
 	++count;
 
 	ANKI_ASSERT(count == m_allPlanes.getSize());
+
+	m_clusterBoxes.create(m_alloc, m_counts[0] * m_counts[1] * m_counts[2]);
 }
 
-void Clusterer::prepare(ThreadPool& threadPool, const FrustumComponent& frc)
+void Clusterer::prepare(ThreadPool& threadPool, const ClustererPrepareInfo& inf)
 {
-	// Get some things
-	Timestamp frcTimestamp = frc.getTimestamp();
-	const Frustum& fr = frc.getFrustum();
-	ANKI_ASSERT(fr.getType() == Frustum::Type::PERSPECTIVE);
-	const PerspectiveFrustum& pfr = static_cast<const PerspectiveFrustum&>(fr);
-
-	// Set some things
-	const SceneNode* node = m_node; // Save for later compare
-	m_node = &frc.getSceneNode();
-	m_frc = &frc;
-	m_near = pfr.getNear();
-	m_far = pfr.getFar();
+	Bool frustumChanged = m_projMat != inf.m_projMat;
+
+	// Compute cached values
+	m_projMat = inf.m_projMat;
+	m_viewMat = inf.m_viewMat;
+	m_camTrf = inf.m_camTrf;
+
+	m_unprojParams = m_projMat.extractPerspectiveUnprojectionParams();
+	m_near = -m_unprojParams.z() / (m_unprojParams.w() + 0.0);
+	m_far = -m_unprojParams.z() / (m_unprojParams.w() + 1.0);
+	ANKI_ASSERT(m_near < m_far && m_near > 0.0);
 	m_calcNearOpt = (m_far - m_near) / pow(m_counts[2], 2.0);
 	m_shaderMagicVal = -1.0 / m_calcNearOpt;
 
@@ -174,11 +212,6 @@ void Clusterer::prepare(ThreadPool& threadPool, const FrustumComponent& frc)
 	//
 	Array<UpdatePlanesPerspectiveCameraTask, ThreadPool::MAX_THREADS> jobs;
 
-	// Do a job that transforms only the planes when:
-	// - it's the same frustum component as before and
-	// - the component has not changed
-	Bool frustumChanged = frcTimestamp > m_planesLSpaceTimestamp || m_node != node;
-
 	for(U i = 0; i < threadPool.getThreadsCount(); i++)
 	{
 		jobs[i].m_clusterer = this;
@@ -186,12 +219,6 @@ void Clusterer::prepare(ThreadPool& threadPool, const FrustumComponent& frc)
 		threadPool.assignNewTask(i, &jobs[i]);
 	}
 
-	// Update timestamp
-	if(frustumChanged)
-	{
-		m_planesLSpaceTimestamp = frcTimestamp;
-	}
-
 	// Sync threads
 	Error err = threadPool.waitForAllThreadsToFinish();
 	(void)err;
@@ -201,7 +228,7 @@ void Clusterer::computeSplitRange(const CollisionShape& cs, U& zBegin, U& zEnd)
 {
 	// Find the distance between cs and near plane
 	F32 dist = cs.testPlane(*m_nearPlane);
-	dist = max(0.0f, dist);
+	dist = max(m_near, dist);
 
 	// Find split
 	zBegin = calcZ(-dist);
@@ -227,9 +254,7 @@ void Clusterer::bin(const CollisionShape& cs, const Aabb& csBox, ClustererTestRe
 	}
 	else
 	{
-		U zBegin, zEnd;
-		computeSplitRange(cs, zBegin, zEnd);
-		binGeneric(cs, 0, m_counts[0], 0, m_counts[1], zBegin, zEnd, rez);
+		binGeneric(cs, csBox, rez);
 	}
 }
 
@@ -247,26 +272,8 @@ void Clusterer::totallyInsideAllTiles(U zBegin, U zEnd, ClustererTestResult& rez
 	}
 }
 
-void Clusterer::binSphere(const Sphere& s, const Aabb& aabb, ClustererTestResult& rez) const
+void Clusterer::quickReduction(const Aabb& aabb, const Mat4& mvp, U& xBegin, U& xEnd, U& yBegin, U& yEnd) const
 {
-	const Mat4& vp = m_frc->getViewProjectionMatrix();
-	const Mat4& v = m_frc->getViewMatrix();
-
-	const Vec4& scent = s.getCenter();
-	const F32 srad = s.getRadius();
-
-	U zBegin, zEnd;
-	computeSplitRange(s, zBegin, zEnd);
-
-	// Do a quick check
-	Vec4 eye = m_frc->getFrustumOrigin() - scent;
-	if(ANKI_UNLIKELY(eye.getLengthSquared() <= srad * srad))
-	{
-		// Camera totaly inside the sphere
-		totallyInsideAllTiles(zBegin, zEnd, rez);
-		return;
-	}
-
 	// Compute projection points
 	const Vec4& minv = aabb.getMin();
 	const Vec4& maxv = aabb.getMax();
@@ -282,14 +289,8 @@ void Clusterer::binSphere(const Sphere& s, const Aabb& aabb, ClustererTestResult
 	Vec2 min2(MAX_F32), max2(MIN_F32);
 	for(Vec4& p : points)
 	{
-		p = vp * p;
-		if(p.w() <= 0.0)
-		{
-			// This point is behind the near plane. It's a big hustle to properly clip it. Mark the shape totally inside
-			totallyInsideAllTiles(zBegin, zEnd, rez);
-			return;
-		}
-
+		p = mvp * p;
+		ANKI_ASSERT(p.w() > 0.0 && "Should have cliped tha aabb before calling this");
 		p = p.perspectiveDivide();
 
 		for(U i = 0; i < 2; ++i)
@@ -302,82 +303,187 @@ void Clusterer::binSphere(const Sphere& s, const Aabb& aabb, ClustererTestResult
 	min2 = min2 * 0.5 + 0.5;
 	max2 = max2 * 0.5 + 0.5;
 
-	// Do a box test
-	F32 tcountX = m_counts[0];
-	F32 tcountY = m_counts[1];
+	// Compute ranges
+	xBegin = clamp<F32>(floor(m_counts[0] * min2.x()), 0.0, m_counts[0]);
+	xEnd = min<F32>(ceil(m_counts[0] * max2.x()), m_counts[0]);
+	yBegin = clamp<F32>(floor(m_counts[1] * min2.y()), 0, m_counts[1]);
+	yEnd = min<F32>(ceil(m_counts[1] * max2.y()), m_counts[1]);
+
+	ANKI_ASSERT(xBegin < m_counts[0] && xEnd <= m_counts[0]);
+	ANKI_ASSERT(yBegin < m_counts[1] && yEnd <= m_counts[1]);
+}
 
-	I xBegin = floor(tcountX * min2.x());
-	xBegin = clamp<I>(xBegin, 0, m_counts[0]);
+template<typename TFunc>
+void Clusterer::boxReduction(
+	U xBegin, U xEnd, U yBegin, U yEnd, U zBegin, U zEnd, ClustererTestResult& rez, TFunc func) const
+{
+	U zcount = zEnd - zBegin;
+	U ycount = yEnd - yBegin;
+	U xcount = xEnd - xBegin;
 
-	I xEnd = ceil(tcountX * max2.x());
-	xEnd = min<U>(xEnd, m_counts[0]);
+	if(xcount > ycount && xcount > zcount)
+	{
+		for(U z = zBegin; z < zEnd; ++z)
+		{
+			const U zc = (m_counts[0] * m_counts[1]) * z;
 
-	I yBegin = floor(tcountY * min2.y());
-	yBegin = clamp<I>(yBegin, 0, m_counts[1]);
+			for(U y = yBegin; y < yEnd; ++y)
+			{
+				const U yc = zc + m_counts[0] * y;
 
-	I yEnd = ceil(tcountY * max2.y());
-	yEnd = min<I>(yEnd, m_counts[1]);
+				// Do a reduction to avoid some checks
+				U firstX = MAX_U;
 
-	ANKI_ASSERT(xBegin >= 0 && xBegin <= tcountX && xEnd >= 0 && xEnd <= tcountX);
-	ANKI_ASSERT(yBegin >= 0 && yBegin <= tcountX && yEnd >= 0 && yBegin <= tcountY);
+				for(U x = xBegin; x < xEnd; ++x)
+				{
+					U i = yc + x;
 
-	Vec2 tileSize(1.0 / tcountX, 1.0 / tcountY);
+					if(func(m_clusterBoxes[i]))
+					{
+						firstX = x;
+						break;
+					}
+				}
 
-	Vec4 a = vp * s.getCenter().xyz1();
-	Vec2 c = (a.w() != 0.0) ? (a.xy() / a.w()) : a.xy();
-	c = c * 0.5 + 0.5;
+				for(U x = xEnd - 1; x >= firstX; --x)
+				{
+					U i = yc + x;
 
-	Vec4 sphereCenterVSpace = (v * scent.xyz1()).xyz0();
+					if(func(m_clusterBoxes[i]))
+					{
+						for(U a = firstX; a <= x; ++a)
+						{
+							rez.pushBack(a, y, z);
+						}
 
-	for(I y = yBegin; y < yEnd; ++y)
+						break;
+					}
+				}
+			}
+		}
+	}
+	else if(ycount > xcount && ycount > zcount)
 	{
-		for(I x = xBegin; x < xEnd; ++x)
+		for(U z = zBegin; z < zEnd; ++z)
 		{
-			// Do detailed tests
+			const U zc = (m_counts[0] * m_counts[1]) * z;
 
-			Vec2 tileMin = Vec2(x, y) * tileSize;
-			Vec2 tileMax = Vec2(x + 1, y + 1) * tileSize;
-
-			// Find closest point of sphere center and tile
-			Vec2 cp(0.0);
-			for(U i = 0; i < 2; ++i)
+			for(U x = xBegin; x < xEnd; ++x)
 			{
-				if(c[i] > tileMax[i])
-				{
-					cp[i] = tileMax[i];
-				}
-				else if(c[i] < tileMin[i])
+				// Do a reduction to avoid some checks
+				U firstY = MAX_U;
+
+				for(U y = yBegin; y < yEnd; ++y)
 				{
-					cp[i] = tileMin[i];
+					const U i = zc + m_counts[0] * y + x;
+
+					if(func(m_clusterBoxes[i]))
+					{
+						firstY = y;
+						break;
+					}
 				}
-				else
+
+				for(U y = yEnd - 1; y >= firstY; --y)
 				{
-					// the c lies between min and max
-					cp[i] = c[i];
+					const U i = zc + m_counts[0] * y + x;
+
+					if(func(m_clusterBoxes[i]))
+					{
+						for(U a = firstY; a <= y; ++a)
+						{
+							rez.pushBack(x, a, z);
+						}
+
+						break;
+					}
 				}
 			}
+		}
+	}
+	else
+	{
+		for(U y = yBegin; y < yEnd; ++y)
+		{
+			for(U x = xBegin; x < xEnd; ++x)
+			{
+				// Do a reduction to avoid some checks
+				U firstZ = MAX_U;
 
-			// Unproject the closest point to view space
-			Vec4 view = unproject(1.0, cp * 2.0 - 1.0, m_frc->getProjectionParameters());
+				for(U z = zBegin; z < zEnd; ++z)
+				{
+					const U i = (m_counts[0] * m_counts[1]) * z + m_counts[0] * y + x;
 
-			// Do a simple ray-sphere test
-			Vec4 dir = view;
-			Vec4 proj = sphereCenterVSpace.projectTo(dir);
-			F32 lenSq = (sphereCenterVSpace - proj).getLengthSquared();
-			Bool inside = lenSq <= (srad * srad);
+					if(func(m_clusterBoxes[i]))
+					{
+						firstZ = z;
+						break;
+					}
+				}
 
-			if(inside)
-			{
-				for(U z = zBegin; z < zEnd; ++z)
+				for(U z = zEnd - 1; z >= firstZ; --z)
 				{
-					rez.pushBack(x, y, z);
+					const U i = (m_counts[0] * m_counts[1]) * z + m_counts[0] * y + x;
+
+					if(func(m_clusterBoxes[i]))
+					{
+						for(U a = firstZ; a <= z; ++a)
+						{
+							rez.pushBack(x, y, a);
+						}
+
+						break;
+					}
 				}
 			}
 		}
 	}
 }
 
-void Clusterer::binGeneric(
+void Clusterer::binSphere(const Sphere& s, const Aabb& aabb, ClustererTestResult& rez) const
+{
+	// Move the sphere to view space
+	Vec4 cVSpace = (m_viewMat * s.getCenter().xyz1()).xyz0();
+	Sphere sphere(cVSpace, s.getRadius());
+
+	// Compute a new AABB and clip it
+	Aabb box;
+	sphere.computeAabb(box);
+
+	F32 maxz = min(box.getMax().z(), -m_near - EPSILON);
+	ANKI_ASSERT(box.getMax() > box.getMin());
+	box.setMax(Vec4(box.getMax().xy(), maxz, 0.0));
+
+	// Quick reduction
+	U xBegin, xEnd, yBegin, yEnd, zBegin, zEnd;
+	computeSplitRange(s, zBegin, zEnd);
+	quickReduction(box, m_projMat, xBegin, xEnd, yBegin, yEnd);
+
+	// Detailed
+	boxReduction(xBegin, xEnd, yBegin, yEnd, zBegin, zEnd, rez, [&](const Aabb& aabb) {
+		return testCollisionShapes(sphere, aabb);
+	});
+}
+
+void Clusterer::binGeneric(const CollisionShape& cs, const Aabb& box0, ClustererTestResult& rez) const
+{
+	// Move the box to view space
+	Aabb box = box0.getTransformed(Transform(m_viewMat));
+
+	F32 maxz = min(box.getMax().z(), -m_near - EPSILON);
+	box.setMax(Vec4(box.getMax().xy(), maxz, 0.0));
+	ANKI_ASSERT(box.getMax().xyz() > box.getMin().xyz());
+
+	// Quick reduction
+	U xBegin, xEnd, yBegin, yEnd, zBegin, zEnd;
+	computeSplitRange(cs, zBegin, zEnd);
+	quickReduction(box, m_projMat, xBegin, xEnd, yBegin, yEnd);
+
+	// Detailed
+	binGenericRecursive(cs, xBegin, xEnd, yBegin, yEnd, zBegin, zEnd, rez);
+}
+
+void Clusterer::binGenericRecursive(
 	const CollisionShape& cs, U xBegin, U xEnd, U yBegin, U yEnd, U zBegin, U zEnd, ClustererTestResult& rez) const
 {
 	U my = (yEnd - yBegin) / 2;
@@ -403,12 +509,12 @@ void Clusterer::binGeneric(
 
 			if(test <= 0.0)
 			{
-				binGeneric(cs, xBegin, xEnd, yBegin, yBegin + my, zBegin, zEnd, rez);
+				binGenericRecursive(cs, xBegin, xEnd, yBegin, yBegin + my, zBegin, zEnd, rez);
 			}
 
 			if(test >= 0.0)
 			{
-				binGeneric(cs, xBegin, xEnd, yBegin + my, yEnd, zBegin, zEnd, rez);
+				binGenericRecursive(cs, xBegin, xEnd, yBegin + my, yEnd, zBegin, zEnd, rez);
 			}
 		}
 		else
@@ -418,12 +524,12 @@ void Clusterer::binGeneric(
 
 			if(test <= 0.0)
 			{
-				binGeneric(cs, xBegin, xBegin + mx, yBegin, yEnd, zBegin, zEnd, rez);
+				binGenericRecursive(cs, xBegin, xBegin + mx, yBegin, yEnd, zBegin, zEnd, rez);
 			}
 
 			if(test >= 0.0)
 			{
-				binGeneric(cs, xBegin + mx, xEnd, yBegin, yEnd, zBegin, zEnd, rez);
+				binGenericRecursive(cs, xBegin + mx, xEnd, yBegin, yEnd, zBegin, zEnd, rez);
 			}
 		}
 
@@ -483,33 +589,67 @@ void Clusterer::binGeneric(
 	// Now move lower to the hierarchy
 	if(inside[0][0])
 	{
-		binGeneric(cs, xBegin, xBegin + mx, yBegin, yBegin + my, zBegin, zEnd, rez);
+		binGenericRecursive(cs, xBegin, xBegin + mx, yBegin, yBegin + my, zBegin, zEnd, rez);
 	}
 
 	if(inside[0][1])
 	{
-		binGeneric(cs, xBegin + mx, xEnd, yBegin, yBegin + my, zBegin, zEnd, rez);
+		binGenericRecursive(cs, xBegin + mx, xEnd, yBegin, yBegin + my, zBegin, zEnd, rez);
 	}
 
 	if(inside[1][0])
 	{
-		binGeneric(cs, xBegin, xBegin + mx, yBegin + my, yEnd, zBegin, zEnd, rez);
+		binGenericRecursive(cs, xBegin, xBegin + mx, yBegin + my, yEnd, zBegin, zEnd, rez);
 	}
 
 	if(inside[1][1])
 	{
-		binGeneric(cs, xBegin + mx, xEnd, yBegin + my, yEnd, zBegin, zEnd, rez);
+		binGenericRecursive(cs, xBegin + mx, xEnd, yBegin + my, yEnd, zBegin, zEnd, rez);
+	}
+}
+
+void Clusterer::binPerspectiveFrustum(const PerspectiveFrustum& fr, const Aabb& box0, ClustererTestResult& rez) const
+{
+	rez.m_count = 0;
+
+	// Move the box to view space
+	Aabb box = box0.getTransformed(Transform(m_viewMat));
+
+	F32 maxz = min(box.getMax().z(), -m_near - EPSILON);
+	box.setMax(Vec4(box.getMax().xy(), maxz, 0.0));
+	ANKI_ASSERT(box.getMax().xyz() > box.getMin().xyz());
+
+	// Quick reduction
+	U xBegin, xEnd, yBegin, yEnd, zBegin, zEnd;
+	computeSplitRange(fr, zBegin, zEnd);
+	quickReduction(box, m_projMat, xBegin, xEnd, yBegin, yEnd);
+
+	// Detailed tests
+	Array<Plane, 5> vspacePlanes;
+	for(U i = 0; i < vspacePlanes.getSize(); ++i)
+	{
+		vspacePlanes[i] = fr.getPlanesWorldSpace()[i + 1].getTransformed(Transform(m_viewMat));
 	}
+
+	boxReduction(xBegin, xEnd, yBegin, yEnd, zBegin, zEnd, rez, [&](const Aabb& aabb) {
+		for(const Plane& p : vspacePlanes)
+		{
+			if(aabb.testPlane(p) < 0.0)
+			{
+				return false;
+			}
+		}
+
+		return true;
+	});
 }
 
 void Clusterer::update(U32 threadId, PtrSize threadsCount, Bool frustumChanged)
 {
 	PtrSize start, end;
-	const FrustumComponent& frc = *m_frc;
-	ANKI_ASSERT(frc.getFrustum().getType() == Frustum::Type::PERSPECTIVE);
 
-	const Transform& trf = frc.getFrustum().getTransform();
-	const Vec4& projParams = frc.getProjectionParameters();
+	const Transform& trf = m_camTrf;
+	const Vec4& projParams = m_unprojParams;
 
 	if(frustumChanged)
 	{
@@ -534,6 +674,10 @@ void Clusterer::update(U32 threadId, PtrSize threadsCount, Bool frustumChanged)
 
 			m_planesXW[j] = m_planesX[j].getTransformed(trf);
 		}
+
+		// The boxes
+		ThreadPoolTask::choseStartEnd(threadId, threadsCount, m_clusterBoxes.getSize(), start, end);
+		setClusterBoxes(projParams, start, end);
 	}
 	else
 	{
@@ -567,4 +711,58 @@ void Clusterer::update(U32 threadId, PtrSize threadsCount, Bool frustumChanged)
 	}
 }
 
+void Clusterer::debugDraw(ClustererDebugDrawer& drawer) const
+{
+}
+
+void Clusterer::debugDrawResult(const ClustererTestResult& rez, ClustererDebugDrawer& drawer) const
+{
+	const Vec4& projParams = m_unprojParams;
+
+	auto it = rez.getClustersBegin();
+	auto end = rez.getClustersEnd();
+	while(it != end)
+	{
+		const auto& id = *it;
+		Array<Vec3, 8> frustumPoints;
+		U count = 0;
+
+		for(U z = id.z(); z <= id.z() + 1u; ++z)
+		{
+			F32 zVSpace = -calcNear(z);
+
+			for(U y = id.y(); y <= id.y() + 1u; ++y)
+			{
+				F32 yNdc = F32(y) / m_counts[1] * 2.0 - 1.0;
+
+				for(U x = id.x(); x <= id.x() + 1u; ++x)
+				{
+					F32 xNdc = F32(x) / m_counts[0] * 2.0 - 1.0;
+
+					frustumPoints[count++] = unprojectZViewSpace(zVSpace, Vec2(xNdc, yNdc), projParams).xyz();
+				}
+			}
+		}
+
+		ANKI_ASSERT(count == 8);
+		static const Vec3 COLOR(1.0);
+		drawer(frustumPoints[0], frustumPoints[1], COLOR);
+		drawer(frustumPoints[1], frustumPoints[3], COLOR);
+		drawer(frustumPoints[3], frustumPoints[2], COLOR);
+		drawer(frustumPoints[0], frustumPoints[2], COLOR);
+
+		drawer(frustumPoints[4], frustumPoints[5], COLOR);
+		drawer(frustumPoints[5], frustumPoints[7], COLOR);
+		drawer(frustumPoints[7], frustumPoints[6], COLOR);
+		drawer(frustumPoints[4], frustumPoints[6], COLOR);
+
+		drawer(frustumPoints[0], frustumPoints[4], COLOR);
+		drawer(frustumPoints[1], frustumPoints[5], COLOR);
+		drawer(frustumPoints[2], frustumPoints[6], COLOR);
+		drawer(frustumPoints[3], frustumPoints[7], COLOR);
+
+		++it;
+	}
+}
+
 } // end namespace anki

+ 94 - 22
src/anki/renderer/Clusterer.h

@@ -17,10 +17,56 @@ namespace anki
 class FrustumComponent;
 class SceneNode;
 class ThreadPool;
+class PerspectiveFrustum;
 
 /// @addtogroup renderer
 /// @{
 
+class ClustererDebugDrawer
+{
+public:
+	virtual ~ClustererDebugDrawer() = default;
+
+	virtual void operator()(const Vec3& lineA, const Vec3& lineB, const Vec3& color) = 0;
+};
+
+class alignas(alignof(U32)) Cluster
+{
+	friend class Clusterer;
+	friend class ClustererTestResult;
+
+public:
+	U x() const
+	{
+		return m_v.m_x;
+	}
+
+	U y() const
+	{
+		return m_v.m_y;
+	}
+
+	U z() const
+	{
+		return m_v.m_z;
+	}
+
+private:
+	union
+	{
+		struct
+		{
+			U8 m_x;
+			U8 m_y;
+			U8 m_z;
+			U8 m_pad_;
+		} m_v;
+		U32 m_u32;
+	};
+};
+
+static_assert(sizeof(Cluster) == sizeof(U32), "Wrong size");
+
 /// The result of the cluster tests.
 class ClustererTestResult
 {
@@ -36,12 +82,12 @@ public:
 		m_clusterIds.destroy(m_alloc);
 	}
 
-	DynamicArray<Array<U8, 3>>::ConstIterator getClustersBegin() const
+	DynamicArray<Cluster>::ConstIterator getClustersBegin() const
 	{
 		return m_clusterIds.getBegin();
 	}
 
-	DynamicArray<Array<U8, 3>>::ConstIterator getClustersEnd() const
+	DynamicArray<Cluster>::ConstIterator getClustersEnd() const
 	{
 		return m_clusterIds.getBegin() + m_count;
 	}
@@ -52,20 +98,34 @@ public:
 	}
 
 private:
-	DynamicArray<Array<U8, 3>> m_clusterIds;
-	U32 m_count = 0;
 	GenericMemoryPoolAllocator<U8> m_alloc;
+	DynamicArray<Cluster> m_clusterIds;
+	U32 m_count = 0;
 
 	void pushBack(U x, U y, U z)
 	{
 		ANKI_ASSERT(x <= 0xFF && y <= 0xFF && z <= 0xFF);
-		m_clusterIds[m_count][0] = U8(x);
-		m_clusterIds[m_count][1] = U8(y);
-		m_clusterIds[m_count][2] = U8(z);
+#if 1
+		m_clusterIds[m_count].m_u32 = (z << 16) | (y << 8) | x;
+		ANKI_ASSERT(m_clusterIds[m_count].x() == x && m_clusterIds[m_count].y() == y && m_clusterIds[m_count].z() == z);
+#else
+		m_clusterIds[m_count].m_v.m_x = U8(x);
+		m_clusterIds[m_count].m_v.m_y = U8(y);
+		m_clusterIds[m_count].m_v.m_z = U8(z);
+#endif
 		++m_count;
 	}
 };
 
+/// Info that will prepare the clusterer.
+class ClustererPrepareInfo
+{
+public:
+	Mat4 m_viewMat;
+	Mat4 m_projMat; ///< Must be perspective projection.
+	Transform m_camTrf;
+};
+
 /// Collection of clusters for visibility tests.
 class Clusterer
 {
@@ -81,13 +141,16 @@ public:
 	void init(const GenericMemoryPoolAllocator<U8>& alloc, U clusterCountX, U clusterCountY, U clusterCountZ);
 
 	/// Prepare for visibility tests.
-	void prepare(ThreadPool& threadpool, const FrustumComponent& frc);
+	void prepare(ThreadPool& threadpool, const ClustererPrepareInfo& inf);
 
 	void initTestResults(const GenericMemoryPoolAllocator<U8>& alloc, ClustererTestResult& rez) const;
 
 	/// Bin collision shape.
 	void bin(const CollisionShape& cs, const Aabb& csBox, ClustererTestResult& rez) const;
 
+	/// Bin a frustum.
+	void binPerspectiveFrustum(const PerspectiveFrustum& fr, const Aabb& csBox, ClustererTestResult& rez) const;
+
 	/// A value that will be used in shaders to calculate the cluster index.
 	F32 getShaderMagicValue() const
 	{
@@ -114,6 +177,12 @@ public:
 		return m_counts[0] * m_counts[1] * m_counts[2];
 	}
 
+	/// Call this after prepare()
+	void debugDraw(ClustererDebugDrawer& drawer) const;
+
+	/// Call this with a result.
+	void debugDrawResult(const ClustererTestResult& rez, ClustererDebugDrawer& drawer) const;
+
 private:
 	GenericMemoryPoolAllocator<U8> m_alloc;
 
@@ -128,33 +197,38 @@ private:
 	Plane* m_nearPlane; ///< In world space
 	Plane* m_farPlane; ///< In world space
 
-	/// Used to check if the frustum is changed and we need to update the planes.
-	const SceneNode* m_node = nullptr;
-
-	const FrustumComponent* m_frc = nullptr; ///< Cache it.
-
-	/// Timestamp for the same reason as m_frc.
-	Timestamp m_planesLSpaceTimestamp = 0;
+	/// Cluster boxes in view space.
+	DynamicArray<Aabb> m_clusterBoxes;
 
+	Mat4 m_viewMat = Mat4::getIdentity();
+	Mat4 m_projMat = Mat4::getIdentity();
+	Transform m_camTrf = Transform::getIdentity();
+	Vec4 m_unprojParams = Vec4(0.0);
 	F32 m_near = 0.0;
 	F32 m_far = 0.0;
 	F32 m_calcNearOpt = 0.0;
 	F32 m_shaderMagicVal = 0.0;
 
-	Array<Vec4, 8> m_disk; ///< To bin a sphere in tiles.
-
-	void initDisk();
-
 	F32 calcNear(U k) const;
 
 	U calcZ(F32 zVspace) const;
 
-	void binGeneric(
+	void setClusterBoxes(const Vec4& projParams, U begin, U end);
+
+	void binGeneric(const CollisionShape& cs, const Aabb& box, ClustererTestResult& rez) const;
+	void binGenericRecursive(
 		const CollisionShape& cs, U xBegin, U xEnd, U yBegin, U yEnd, U zBegin, U zEnd, ClustererTestResult& rez) const;
 
 	/// Special fast path for binning spheres.
 	void binSphere(const Sphere& s, const Aabb& aabb, ClustererTestResult& rez) const;
 
+	/// Quick reduction.
+	void quickReduction(const Aabb& aabb, const Mat4& mvp, U& xBegin, U& xEnd, U& yBegin, U& yEnd) const;
+
+	/// Box based reduction.
+	template<typename TFunc>
+	void boxReduction(U xBegin, U xEnd, U yBegin, U yEnd, U zBegin, U zEnd, ClustererTestResult& rez, TFunc func) const;
+
 	void computeSplitRange(const CollisionShape& cs, U& zBegin, U& zEnd) const;
 
 	void update(U32 threadId, PtrSize threadsCount, Bool frustumChanged);
@@ -167,8 +241,6 @@ private:
 
 	/// Call this when a shape is visible by all tiles.
 	void totallyInsideAllTiles(U zBegin, U zEnd, ClustererTestResult& rez) const;
-
-	void createConverHull();
 };
 /// @}
 

+ 38 - 79
src/anki/renderer/Dbg.cpp

@@ -16,7 +16,7 @@
 #include <anki/misc/ConfigSet.h>
 #include <anki/collision/ConvexHullShape.h>
 #include <anki/Ui.h> /// XXX
-#include <anki/scene/SoftwareRasterizer.h> /// XXX
+#include <anki/renderer/Clusterer.h> /// XXX
 
 namespace anki
 {
@@ -200,97 +200,56 @@ Error Dbg::run(RenderingContext& ctx)
 
 #if 0
 	{
-		m_drawer->setViewProjectionMatrix(Mat4::getIdentity());
-		m_drawer->setModelMatrix(Mat4::getIdentity());
-		Mat4 proj = camFrc.getProjectionMatrix();
-		Mat4 view = camFrc.getViewMatrix();
-
-		Array<Vec4, 12> ltriangle = {Vec4(0.0, 2.0, 2.0, 1.0),
-			Vec4(4.0, 2.0, 2.0, 1.0),
-			Vec4(0.0, 8.0, 2.0, 1.0),
-
-			Vec4(0.0, 8.0, 2.0, 1.0),
-			Vec4(4.0, 2.0, 2.0, 1.0),
-			Vec4(4.0, 8.0, 2.0, 1.0),
-
-			Vec4(0.9, 2.0, 1.9, 1.0),
-			Vec4(4.9, 2.0, 1.9, 1.0),
-			Vec4(0.9, 8.0, 1.9, 1.0),
+		Clusterer c;
+		c.init(getAllocator(), 16, 12, 30);
 
-			Vec4(0.9, 8.0, 1.9, 1.0),
-			Vec4(4.9, 2.0, 1.9, 1.0),
-			Vec4(4.9, 8.0, 1.9, 1.0)};
+		const FrustumComponent& frc = scene.findSceneNode("cam0").getComponent<FrustumComponent>();
+		const MoveComponent& movc = scene.findSceneNode("cam0").getComponent<MoveComponent>();
 
-		SoftwareRasterizer r;
-		r.init(getAllocator());
-		r.prepare(
-			view, proj, m_r->getTileCountXY().x(), m_r->getTileCountXY().y());
-		r.draw(&ltriangle[0][0], 12, sizeof(Vec4));
+		ClustererPrepareInfo pinf;
+		pinf.m_viewMat = frc.getViewMatrix();
+		pinf.m_projMat = frc.getProjectionMatrix();
+		pinf.m_camTrf = frc.getFrustum().getTransform();
+		c.prepare(m_r->getThreadPool(), pinf);
 
-		/*m_drawer->begin(PrimitiveTopology::TRIANGLES);
-		U count = 0;
-		for(U y = 0; y < m_r->getTileCountXY().y(); ++y)
+		class DD : public ClustererDebugDrawer
 		{
-			for(U x = 0; x < m_r->getTileCountXY().x(); ++x)
-			{
-				F32 d = r.m_zbuffer[y * m_r->getTileCountXY().x() + x].get()
-					/ F32(MAX_U32);
-
-				if(d < 1.0)
-				{
-					F32 zNear = camFrc.getFrustum().getNear();
-					F32 zFar = camFrc.getFrustum().getFar();
-					F32 ld =
-						(2.0 * zNear) / (zFar + zNear - d * (zFar - zNear));
-					m_drawer->setColor(Vec4(ld));
+		public:
+			DebugDrawer* m_d;
 
-					++count;
-					Vec2 min(F32(x) / m_r->getTileCountXY().x(),
-						F32(y) / m_r->getTileCountXY().y());
+			void operator()(const Vec3& lineA, const Vec3& lineB, const Vec3& color)
+			{
+				m_d->drawLine(lineA, lineB, color.xyz1());
+			}
+		};
 
-					Vec2 max(F32(x + 1) / m_r->getTileCountXY().x(),
-						F32(y + 1) / m_r->getTileCountXY().y());
+		DD dd;
+		dd.m_d = m_drawer;
 
-					min = min * 2.0 - 1.0;
-					max = max * 2.0 - 1.0;
+		CollisionDebugDrawer cd(m_drawer);
 
-					m_drawer->pushBackVertex(Vec3(min.x(), min.y(), 0.0));
-					m_drawer->pushBackVertex(Vec3(max.x(), min.y(), 0.0));
-					m_drawer->pushBackVertex(Vec3(min.x(), max.y(), 0.0));
+		Sphere s(Vec4(1.0, 0.1, -1.2, 0.0), 1.2);
+		PerspectiveFrustum fr(toRad(25.), toRad(35.), 0.1, 5.);
+		fr.transform(Transform(Vec4(0., 1., 0., 0.), Mat3x4::getIdentity(), 1.0));
 
-					m_drawer->pushBackVertex(Vec3(min.x(), max.y(), 0.0));
-					m_drawer->pushBackVertex(Vec3(max.x(), min.y(), 0.0));
-					m_drawer->pushBackVertex(Vec3(max.x(), max.y(), 0.0));
-				}
-			}
-		}
-		m_drawer->end();*/
+		m_drawer->setModelMatrix(Mat4(movc.getWorldTransform()));
+		// c.debugDraw(dd);
 
-		m_drawer->setViewProjectionMatrix(camFrc.getViewProjectionMatrix());
-		Vec3 offset(0.0, 0.0, 0.0);
-		m_drawer->setColor(Vec4(0.5));
-		m_drawer->begin(PrimitiveTopology::TRIANGLES);
-		for(U i = 0; i < ltriangle.getSize() / 3; ++i)
+		if(frc.getFrustum().insideFrustum(s))
 		{
-			m_drawer->pushBackVertex(ltriangle[i * 3 + 0].xyz());
-			m_drawer->pushBackVertex(ltriangle[i * 3 + 1].xyz());
-			m_drawer->pushBackVertex(ltriangle[i * 3 + 2].xyz());
+			ClustererTestResult rez;
+			c.initTestResults(getAllocator(), rez);
+			Aabb sbox;
+			s.computeAabb(sbox);
+			//c.binPerspectiveFrustum(fr, sbox, rez);
+			c.bin(s, sbox, rez);
+
+			c.debugDrawResult(rez, dd);
 		}
-		m_drawer->end();
 
-		SceneNode& node = scene.findSceneNode("Lamp");
-		SpatialComponent& spc = node.getComponent<SpatialComponent>();
-		Aabb nodeAabb = spc.getAabb();
-
-		Bool inside =
-			r.visibilityTest(spc.getSpatialCollisionShape(), nodeAabb);
-
-		if(inside)
-		{
-			m_drawer->setColor(Vec4(1.0, 0.0, 0.0, 1.0));
-			CollisionDebugDrawer cd(m_drawer);
-			nodeAabb.accept(cd);
-		}
+		m_drawer->setColor(Vec4(1.0, 1.0, 0.0, 1.0));
+		frc.getFrustum().accept(cd);
+		s.accept(cd);
 	}
 #endif
 

+ 2 - 2
src/anki/renderer/DebugDrawer.cpp

@@ -380,10 +380,10 @@ void CollisionDebugDrawer::visit(const Frustum& f)
 {
 	switch(f.getType())
 	{
-	case Frustum::Type::ORTHOGRAPHIC:
+	case FrustumType::ORTHOGRAPHIC:
 		visit(static_cast<const OrthographicFrustum&>(f).getObb());
 		break;
-	case Frustum::Type::PERSPECTIVE:
+	case FrustumType::PERSPECTIVE:
 	{
 		const PerspectiveFrustum& pf = static_cast<const PerspectiveFrustum&>(f);
 

+ 33 - 14
src/anki/renderer/LightBin.cpp

@@ -373,7 +373,11 @@ Error LightBin::bin(FrustumComponent& frc,
 	ANKI_TRACE_START_EVENT(RENDERER_LIGHT_BINNING);
 
 	// Prepare the clusterer
-	m_clusterer.prepare(*m_threadPool, frc);
+	ClustererPrepareInfo pinf;
+	pinf.m_viewMat = frc.getViewMatrix();
+	pinf.m_projMat = frc.getProjectionMatrix();
+	pinf.m_camTrf = frc.getFrustum().getTransform();
+	m_clusterer.prepare(*m_threadPool, pinf);
 
 	VisibilityTestResults& vi = frc.getVisibilityTestResults();
 
@@ -552,7 +556,7 @@ void LightBin::binLights(U32 threadId, PtrSize threadsCount, LightBinContext& ct
 				const FrustumComponent* frc = snode.tryGetComponent<FrustumComponent>();
 
 				I pos = writeSpotLight(light, move, frc, cammove, camfrc, ctx);
-				binLight(sp, pos, 1, ctx, testResult);
+				binLight(sp, light, pos, 1, ctx, testResult);
 			}
 			else if(j >= ctx.m_vDecals.getSize())
 			{
@@ -564,7 +568,7 @@ void LightBin::binLights(U32 threadId, PtrSize threadsCount, LightBinContext& ct
 				SpatialComponent& sp = snode.getComponent<SpatialComponent>();
 
 				I pos = writePointLight(light, move, camfrc, ctx);
-				binLight(sp, pos, 0, ctx, testResult);
+				binLight(sp, light, pos, 0, ctx, testResult);
 			}
 			else
 			{
@@ -737,18 +741,33 @@ I LightBin::writeSpotLight(const LightComponent& lightc,
 	return i;
 }
 
-void LightBin::binLight(SpatialComponent& sp, U pos, U lightType, LightBinContext& ctx, ClustererTestResult& testResult)
+void LightBin::binLight(const SpatialComponent& sp,
+	const LightComponent& lightc,
+	U pos,
+	U lightType,
+	LightBinContext& ctx,
+	ClustererTestResult& testResult) const
 {
-	m_clusterer.bin(sp.getSpatialCollisionShape(), sp.getAabb(), testResult);
+	if(lightc.getLightComponentType() == LightComponentType::SPOT)
+	{
+		const FrustumComponent& frc = lightc.getSceneNode().getComponent<FrustumComponent>();
+		ANKI_ASSERT(frc.getFrustum().getType() == FrustumType::PERSPECTIVE);
+		m_clusterer.binPerspectiveFrustum(
+			static_cast<const PerspectiveFrustum&>(frc.getFrustum()), sp.getAabb(), testResult);
+	}
+	else
+	{
+		m_clusterer.bin(sp.getSpatialCollisionShape(), sp.getAabb(), testResult);
+	}
 
 	// Bin to the correct tiles
 	auto it = testResult.getClustersBegin();
 	auto end = testResult.getClustersEnd();
 	for(; it != end; ++it)
 	{
-		U x = (*it)[0];
-		U y = (*it)[1];
-		U z = (*it)[2];
+		U x = (*it).x();
+		U y = (*it).y();
+		U z = (*it).z();
 
 		U i = m_clusterer.getClusterCountX() * (z * m_clusterer.getClusterCountY() + y) + x;
 
@@ -792,9 +811,9 @@ void LightBin::writeAndBinProbe(
 	auto end = testResult.getClustersEnd();
 	for(; it != end; ++it)
 	{
-		U x = (*it)[0];
-		U y = (*it)[1];
-		U z = (*it)[2];
+		U x = (*it).x();
+		U y = (*it).y();
+		U z = (*it).z();
 
 		U i = m_clusterer.getClusterCountX() * (z * m_clusterer.getClusterCountY() + y) + x;
 
@@ -857,9 +876,9 @@ void LightBin::writeAndBinDecal(
 	auto end = testResult.getClustersEnd();
 	for(; it != end; ++it)
 	{
-		U x = (*it)[0];
-		U y = (*it)[1];
-		U z = (*it)[2];
+		U x = (*it).x();
+		U y = (*it).y();
+		U z = (*it).z();
 
 		U i = m_clusterer.getClusterCountX() * (z * m_clusterer.getClusterCountY() + y) + x;
 

+ 6 - 1
src/anki/renderer/LightBin.h

@@ -72,7 +72,12 @@ private:
 		const FrustumComponent& camFrc,
 		LightBinContext& ctx);
 
-	void binLight(SpatialComponent& sp, U pos, U lightType, LightBinContext& ctx, ClustererTestResult& testResult);
+	void binLight(const SpatialComponent& sp,
+		const LightComponent& lightc,
+		U pos,
+		U lightType,
+		LightBinContext& ctx,
+		ClustererTestResult& testResult) const;
 
 	void writeAndBinProbe(
 		const FrustumComponent& camFrc, const SceneNode& node, LightBinContext& ctx, ClustererTestResult& testResult);

+ 1 - 1
src/anki/renderer/Renderer.cpp

@@ -188,7 +188,7 @@ Error Renderer::render(RenderingContext& ctx)
 	FrustumComponent& frc = *ctx.m_frustumComponent;
 	CommandBufferPtr& cmdb = ctx.m_commandBuffer;
 
-	ANKI_ASSERT(frc.getFrustum().getType() == Frustum::Type::PERSPECTIVE);
+	ANKI_ASSERT(frc.getFrustum().getType() == FrustumType::PERSPECTIVE);
 
 	// Check if resources got loaded
 	if(m_prevLoadRequestCount != m_resources->getLoadingRequestCount()

+ 2 - 2
src/anki/renderer/Sm.cpp

@@ -341,7 +341,7 @@ void Sm::prepareBuildCommandBuffers(RenderingContext& ctx)
 	{
 		SceneNode* node = (*it).m_node;
 		LightComponent& light = node->getComponent<LightComponent>();
-		ANKI_ASSERT(light.getLightType() == LightComponent::LightType::POINT);
+		ANKI_ASSERT(light.getLightComponentType() == LightComponentType::POINT);
 
 		if(light.getShadowEnabled())
 		{
@@ -361,7 +361,7 @@ void Sm::prepareBuildCommandBuffers(RenderingContext& ctx)
 	{
 		SceneNode* node = (*it).m_node;
 		LightComponent& light = node->getComponent<LightComponent>();
-		ANKI_ASSERT(light.getLightType() == LightComponent::LightType::SPOT);
+		ANKI_ASSERT(light.getLightComponentType() == LightComponentType::SPOT);
 
 		if(light.getShadowEnabled())
 		{

+ 2 - 17
src/anki/scene/FrustumComponent.cpp

@@ -61,24 +61,9 @@ Error FrustumComponent::update(SceneNode& node, F32, F32, Bool& updated)
 
 void FrustumComponent::computeProjectionParams()
 {
-	const Mat4& m = m_pm;
-
-	if(m_frustum->getType() == Frustum::Type::PERSPECTIVE)
+	if(m_frustum->getType() == FrustumType::PERSPECTIVE)
 	{
-		// First, z' = (m * Pv) / 2 + 0.5 where Pv is the view space position.
-		// Solving that for Pv.z we get
-		// Pv.z = A / (z' + B)
-		// where A = (-m23 / 2) and B = (m22/2 - 0.5)
-		// so we save the A and B in the projection params vector
-		m_projParams.z() = -m(2, 3) * 0.5;
-		m_projParams.w() = m(2, 2) * 0.5 - 0.5;
-
-		// Using the same logic the Pv.x = x' * w / m00
-		// so Pv.x = x' * Pv.z * (-1 / m00)
-		m_projParams.x() = -1.0 / m(0, 0);
-
-		// Same for y
-		m_projParams.y() = -1.0 / m(1, 1);
+		m_projParams = m_pm.extractPerspectiveUnprojectionParams();
 	}
 	else
 	{

+ 3 - 3
src/anki/scene/Light.cpp

@@ -53,7 +53,7 @@ Light::~Light()
 {
 }
 
-Error Light::init(LightComponent::LightType type, CollisionShape* shape)
+Error Light::init(LightComponentType type, CollisionShape* shape)
 {
 	SceneComponent* comp;
 
@@ -157,7 +157,7 @@ PointLight::~PointLight()
 
 Error PointLight::init()
 {
-	return Light::init(LightComponent::LightType::POINT, &m_sphereW);
+	return Light::init(LightComponentType::POINT, &m_sphereW);
 }
 
 void PointLight::onMoveUpdate(MoveComponent& move)
@@ -247,7 +247,7 @@ SpotLight::SpotLight(SceneGraph* scene, CString name)
 
 Error SpotLight::init()
 {
-	ANKI_CHECK(Light::init(LightComponent::LightType::SPOT, &m_frustum));
+	ANKI_CHECK(Light::init(LightComponentType::SPOT, &m_frustum));
 
 	FrustumComponent* fr = getSceneAllocator().newInstance<FrustumComponent>(this, &m_frustum);
 	fr->setEnabledVisibilityTests(FrustumComponentVisibilityTestFlag::NONE);

+ 1 - 1
src/anki/scene/Light.h

@@ -27,7 +27,7 @@ public:
 
 	~Light();
 
-	ANKI_USE_RESULT Error init(LightComponent::LightType type, CollisionShape* shape);
+	ANKI_USE_RESULT Error init(LightComponentType type, CollisionShape* shape);
 
 	ANKI_USE_RESULT Error loadLensFlare(const CString& filename);
 

+ 1 - 1
src/anki/scene/LightComponent.cpp

@@ -8,7 +8,7 @@
 namespace anki
 {
 
-LightComponent::LightComponent(SceneNode* node, LightType type)
+LightComponent::LightComponent(SceneNode* node, LightComponentType type)
 	: SceneComponent(CLASS_TYPE, node)
 	, m_type(type)
 {

+ 10 - 10
src/anki/scene/LightComponent.h

@@ -14,6 +14,13 @@ namespace anki
 /// @addtogroup scene
 /// @{
 
+enum class LightComponentType : U8
+{
+	POINT,
+	SPOT,
+	COUNT
+};
+
 /// Light component. It's a dummy component used to identify lights
 class LightComponent : public SceneComponent
 {
@@ -24,16 +31,9 @@ public:
 	/// WARNING: If you change here update the shaders.
 	static constexpr F32 FRUSTUM_NEAR_PLANE = 0.1 / 4.0;
 
-	enum class LightType : U8
-	{
-		POINT,
-		SPOT,
-		COUNT
-	};
-
-	LightComponent(SceneNode* node, LightType type);
+	LightComponent(SceneNode* node, LightComponentType type);
 
-	LightType getLightType() const
+	LightComponentType getLightComponentType() const
 	{
 		return m_type;
 	}
@@ -138,7 +138,7 @@ public:
 	ANKI_USE_RESULT Error update(SceneNode&, F32, F32, Bool& updated) override;
 
 private:
-	LightType m_type;
+	LightComponentType m_type;
 	Vec4 m_diffColor = Vec4(0.5);
 	Vec4 m_specColor = Vec4(0.5);
 	union

+ 1 - 1
src/anki/scene/SoftwareRasterizer.cpp

@@ -41,7 +41,7 @@ void SoftwareRasterizer::clipTriangle(const Vec4* inVerts, Vec4* outVerts, U& ou
 {
 	ANKI_ASSERT(inVerts && outVerts);
 
-	const Plane& plane = m_planesL[Frustum::PlaneType::NEAR];
+	const Plane& plane = m_planesL[FrustumPlaneType::NEAR];
 	F32 clipZ = -plane.getOffset() - EPSILON;
 	ANKI_ASSERT(clipZ < 0.0);
 

+ 3 - 3
src/anki/scene/Visibility.cpp

@@ -382,12 +382,12 @@ void VisibilityTestTask::test(ThreadHive& hive)
 		if(lc && wantsLightComponents)
 		{
 			VisibilityGroupType gt;
-			switch(lc->getLightType())
+			switch(lc->getLightComponentType())
 			{
-			case LightComponent::LightType::POINT:
+			case LightComponentType::POINT:
 				gt = VisibilityGroupType::LIGHTS_POINT;
 				break;
-			case LightComponent::LightType::SPOT:
+			case LightComponentType::SPOT:
 				gt = VisibilityGroupType::LIGHTS_SPOT;
 				break;
 			default:

+ 1 - 1
src/anki/util/Logger.h

@@ -18,7 +18,7 @@ class File;
 /// @addtogroup util_private
 /// @{
 
-/// The logger singleton class. The logger cannot print errors or throw exceptions, it has to recover somehow. Its
+/// The logger singleton class. The logger cannot print errors or throw exceptions, it has to recover somehow. It's
 /// thread safe.
 /// To add a new signal:
 /// @code logger.addMessageHandler((void*)obj, &function) @endcode

+ 92 - 0
tests/renderer/Clusterer.cpp

@@ -0,0 +1,92 @@
+// Copyright (C) 2009-2016, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <tests/framework/Framework.h>
+#include <anki/renderer/Clusterer.h>
+#include <anki/Collision.h>
+#include <anki/util/ThreadPool.h>
+#include "anki/util/HighRezTimer.h"
+
+namespace anki
+{
+
+ANKI_TEST(Renderer, Clusterer)
+{
+	const U CLUSTER_COUNT_X = 32;
+	const U CLUSTER_COUNT_Y = 24;
+	const U CLUSTER_COUNT_Z = 32;
+	const U ITERATION_COUNT = 32;
+	const U SPHERE_COUNT = 1024;
+	const F32 SPHERE_MAX_RADIUS = 1000.0;
+	const F32 E = 0.01;
+
+	HeapAllocator<U8> alloc(allocAligned, nullptr);
+
+	Clusterer c;
+	c.init(alloc, CLUSTER_COUNT_X, CLUSTER_COUNT_Y, CLUSTER_COUNT_Z);
+
+	PerspectiveFrustum fr(toRad(70.0), toRad(60.0), 0.1, 1000.0);
+	Mat4 projMat = fr.calculateProjectionMatrix();
+	Vec4 unprojParams = projMat.extractPerspectiveUnprojectionParams();
+
+	ThreadPool threadpool(4);
+
+	// Gen spheres
+	DynamicArrayAuto<Sphere> spheres(alloc);
+	spheres.create(SPHERE_COUNT);
+	DynamicArrayAuto<Aabb> sphereBoxes(alloc);
+	sphereBoxes.create(SPHERE_COUNT);
+	for(U i = 0; i < SPHERE_COUNT; ++i)
+	{
+		Vec2 ndc;
+		ndc.x() = clamp((i % 64) / 64.0f, E, 1.0f - E) * 2.0f - 1.0f;
+		ndc.y() = ndc.x();
+		F32 depth = clamp((i % 128) / 128.0f, E, 1.0f - E);
+
+		F32 z = unprojParams.z() / (unprojParams.w() + depth);
+		Vec2 xy = ndc.xy() * unprojParams.xy() * z;
+		Vec4 sphereC(xy, z, 0.0);
+
+		F32 radius = max((i % 64) / 64.0f, 0.1f) * SPHERE_MAX_RADIUS;
+
+		spheres[i] = Sphere(sphereC, radius);
+		spheres[i].computeAabb(sphereBoxes[i]);
+	}
+
+	// Bin spheres
+	HighRezTimer timer;
+	timer.start();
+	U clusterBinCount = 0;
+	for(U i = 0; i < ITERATION_COUNT; ++i)
+	{
+		Transform camTrf(Vec4(0.1, 0.1, 0.1, 0.0), Mat3x4::getIdentity(), 1.0);
+
+		ClustererPrepareInfo pinf;
+		pinf.m_viewMat = Mat4(camTrf).getInverse();
+		pinf.m_projMat = projMat;
+		pinf.m_camTrf = camTrf;
+
+		c.prepare(threadpool, pinf);
+		ClustererTestResult rez;
+		c.initTestResults(alloc, rez);
+
+		for(U s = 0; s < SPHERE_COUNT; ++s)
+		{
+			c.bin(spheres[s], sphereBoxes[s], rez);
+			ANKI_TEST_EXPECT_GT(rez.getClusterCount(), 0);
+			clusterBinCount += rez.getClusterCount();
+		}
+	}
+	timer.stop();
+	F64 ms = timer.getElapsedTime() * 1000.0;
+	printf("Cluster count: %lu.\n"
+		   "Binned %f spheres/ms.\n"
+		   "Avg clusters per sphere %f\n",
+		c.getClusterCount(),
+		F64(SPHERE_COUNT) * F64(ITERATION_COUNT) / ms,
+		clusterBinCount / F32(ITERATION_COUNT * SPHERE_COUNT));
+}
+
+} // end namespace anki