Browse Source

SIMD the AABB plane test. Clusterer can bin 3x more spotlights

Panagiotis Christopoulos Charitos 9 years ago
parent
commit
33d8c51145

+ 17 - 2
src/anki/collision/Aabb.cpp

@@ -31,6 +31,14 @@ Aabb Aabb::getTransformed(const Transform& trf) const
 F32 Aabb::testPlane(const Plane& p) const
 F32 Aabb::testPlane(const Plane& p) const
 {
 {
 	const Aabb& aabb = *this;
 	const Aabb& aabb = *this;
+
+#if ANKI_SIMD == ANKI_SIMD_SSE
+	__m128 gezero = _mm_cmpge_ps(p.getNormal().getSimd(), _mm_setzero_ps());
+
+	Vec4 diagMin;
+	diagMin.getSimd() =
+		_mm_or_ps(_mm_and_ps(gezero, aabb.getMin().getSimd()), _mm_andnot_ps(gezero, aabb.getMax().getSimd()));
+#else
 	Vec4 diagMin(0.0), diagMax(0.0);
 	Vec4 diagMin(0.0), diagMax(0.0);
 	// set min/max values for x,y,z direction
 	// set min/max values for x,y,z direction
 	for(U i = 0; i < 3; i++)
 	for(U i = 0; i < 3; i++)
@@ -46,6 +54,7 @@ F32 Aabb::testPlane(const Plane& p) const
 			diagMax[i] = aabb.getMin()[i];
 			diagMax[i] = aabb.getMin()[i];
 		}
 		}
 	}
 	}
+#endif
 
 
 	// minimum on positive side of plane, box on positive side
 	// minimum on positive side of plane, box on positive side
 	F32 test = p.test(diagMin);
 	F32 test = p.test(diagMin);
@@ -54,15 +63,21 @@ F32 Aabb::testPlane(const Plane& p) const
 		return test;
 		return test;
 	}
 	}
 
 
+#if ANKI_SIMD == ANKI_SIMD_SSE
+	Vec4 diagMax;
+	diagMax.getSimd() =
+		_mm_or_ps(_mm_and_ps(gezero, aabb.getMax().getSimd()), _mm_andnot_ps(gezero, aabb.getMin().getSimd()));
+#endif
+
 	test = p.test(diagMax);
 	test = p.test(diagMax);
-	// min on non-positive side, max on non-negative side, intersection
 	if(test >= 0.0)
 	if(test >= 0.0)
 	{
 	{
+		// min on non-positive side, max on non-negative side, intersection
 		return 0.0;
 		return 0.0;
 	}
 	}
-	// max on negative side, box on negative side
 	else
 	else
 	{
 	{
+		// max on negative side, box on negative side
 		return test;
 		return test;
 	}
 	}
 }
 }

+ 3 - 0
src/anki/math/Vec4.h

@@ -215,6 +215,9 @@ TVec4<F32> TVec4<F32>::Base::getAbs() const;
 template<>
 template<>
 F32 TVec4<F32>::Base::getLengthSquared() const;
 F32 TVec4<F32>::Base::getLengthSquared() const;
 
 
+template<>
+TVec4<F32> TVec4<F32>::Base::operator-() const;
+
 #elif ANKI_SIMD == ANKI_SIMD_NEON
 #elif ANKI_SIMD == ANKI_SIMD_NEON
 
 
 #error "TODO"
 #error "TODO"

+ 8 - 0
src/anki/math/Vec4.inl.h

@@ -175,6 +175,14 @@ inline F32 TVec4<F32>::Base::getLengthSquared() const
 	return o;
 	return o;
 }
 }
 
 
+template<>
+inline TVec4<F32> TVec4<F32>::Base::operator-() const
+{
+	TVec4<F32> o;
+	o.getSimd() = _mm_sub_ps(_mm_setzero_ps(), getSimd());
+	return o;
+}
+
 #elif ANKI_SIMD == ANKI_SIMD_NEON
 #elif ANKI_SIMD == ANKI_SIMD_NEON
 
 
 #error "TODO"
 #error "TODO"

+ 5 - 5
src/anki/renderer/Dbg.cpp

@@ -235,21 +235,21 @@ Error Dbg::run(RenderingContext& ctx)
 		m_drawer->setModelMatrix(Mat4(movc.getWorldTransform()));
 		m_drawer->setModelMatrix(Mat4(movc.getWorldTransform()));
 		// c.debugDraw(dd);
 		// c.debugDraw(dd);
 
 
-		if(frc.getFrustum().insideFrustum(s))
+		if(frc.getFrustum().insideFrustum(fr))
 		{
 		{
 			ClustererTestResult rez;
 			ClustererTestResult rez;
 			c.initTestResults(getAllocator(), rez);
 			c.initTestResults(getAllocator(), rez);
 			Aabb sbox;
 			Aabb sbox;
-			s.computeAabb(sbox);
-			//c.binPerspectiveFrustum(fr, sbox, rez);
-			c.bin(s, sbox, rez);
+			fr.computeAabb(sbox);
+			c.binPerspectiveFrustum(fr, sbox, rez);
+			//c.bin(s, sbox, rez);
 
 
 			c.debugDrawResult(rez, dd);
 			c.debugDrawResult(rez, dd);
 		}
 		}
 
 
 		m_drawer->setColor(Vec4(1.0, 1.0, 0.0, 1.0));
 		m_drawer->setColor(Vec4(1.0, 1.0, 0.0, 1.0));
 		frc.getFrustum().accept(cd);
 		frc.getFrustum().accept(cd);
-		s.accept(cd);
+		fr.accept(cd);
 	}
 	}
 #endif
 #endif
 
 

+ 58 - 0
tests/renderer/Clusterer.cpp

@@ -21,6 +21,9 @@ ANKI_TEST(Renderer, Clusterer)
 	const U SPHERE_COUNT = 1024;
 	const U SPHERE_COUNT = 1024;
 	const F32 SPHERE_MAX_RADIUS = 1000.0;
 	const F32 SPHERE_MAX_RADIUS = 1000.0;
 	const F32 E = 0.01;
 	const F32 E = 0.01;
+	const U FRUSTUM_COUNT = 1024;
+	const F32 FRUSTUM_MAX_ANGLE = toRad(70.0);
+	const F32 FRUSTUM_MAX_DIST = 200.0;
 
 
 	HeapAllocator<U8> alloc(allocAligned, nullptr);
 	HeapAllocator<U8> alloc(allocAligned, nullptr);
 
 
@@ -87,6 +90,61 @@ ANKI_TEST(Renderer, Clusterer)
 		c.getClusterCount(),
 		c.getClusterCount(),
 		F64(SPHERE_COUNT) * F64(ITERATION_COUNT) / ms,
 		F64(SPHERE_COUNT) * F64(ITERATION_COUNT) / ms,
 		clusterBinCount / F32(ITERATION_COUNT * SPHERE_COUNT));
 		clusterBinCount / F32(ITERATION_COUNT * SPHERE_COUNT));
+
+	// Gen spheres
+	DynamicArrayAuto<PerspectiveFrustum> frs(alloc);
+	frs.create(FRUSTUM_COUNT);
+	DynamicArrayAuto<Aabb> frBoxes(alloc);
+	frBoxes.create(FRUSTUM_COUNT);
+	for(U i = 0; i < FRUSTUM_COUNT; ++i)
+	{
+		Vec2 ndc;
+		ndc.x() = clamp((i % 64) / 64.0f, E, 1.0f - E) * 2.0f - 1.0f;
+		ndc.y() = ndc.x();
+		F32 depth = clamp((i % 128) / 128.0f, E, 1.0f - E);
+
+		F32 z = unprojParams.z() / (unprojParams.w() + depth);
+		Vec2 xy = ndc.xy() * unprojParams.xy() * z;
+		Vec4 c(xy, z, 0.0);
+
+		F32 dist = max((i % 64) / 64.0f, 0.1f) * FRUSTUM_MAX_DIST;
+		F32 ang = max((i % 64) / 64.0f, 0.2f) * FRUSTUM_MAX_ANGLE;
+
+		frs[i] = PerspectiveFrustum(ang, ang, 0.1, dist);
+		frs[i].transform(Transform(c, Mat3x4::getIdentity(), 1.0));
+
+		frs[i].computeAabb(frBoxes[i]);
+	}
+
+	// Bin frustums
+	timer.start();
+	clusterBinCount = 0;
+	for(U i = 0; i < ITERATION_COUNT; ++i)
+	{
+		Transform camTrf(Vec4(0.1, 0.1, 0.1, 0.0), Mat3x4::getIdentity(), 1.0);
+
+		ClustererPrepareInfo pinf;
+		pinf.m_viewMat = Mat4(camTrf).getInverse();
+		pinf.m_projMat = projMat;
+		pinf.m_camTrf = camTrf;
+
+		c.prepare(threadpool, pinf);
+		ClustererTestResult rez;
+		c.initTestResults(alloc, rez);
+
+		for(U s = 0; s < FRUSTUM_COUNT; ++s)
+		{
+			c.binPerspectiveFrustum(frs[s], frBoxes[s], rez);
+			ANKI_TEST_EXPECT_GT(rez.getClusterCount(), 0);
+			clusterBinCount += rez.getClusterCount();
+		}
+	}
+	timer.stop();
+	ms = timer.getElapsedTime() * 1000.0;
+	printf("Binned %f frustums/ms.\n"
+		   "Avg clusters per frustum %f\n",
+		F64(FRUSTUM_COUNT) * F64(ITERATION_COUNT) / ms,
+		clusterBinCount / F32(ITERATION_COUNT * FRUSTUM_COUNT));
 }
 }
 
 
 } // end namespace anki
 } // end namespace anki