Browse Source

Try to optimize a bit

Panagiotis Christopoulos Charitos 7 years ago
parent
commit
cfc298309f

+ 4 - 6
CMakeLists.txt

@@ -177,12 +177,10 @@ if(NOT MSVC)
 		set(CXX_FLAGS "${CXX_FLAGS} -static-libstdc++ ")
 	endif()
 
-	if(ANKI_SIMD)
-		if(LINUX OR MACOS OR WINDOWS)
-			set(COMPILER_FLAGS "${COMPILER_FLAGS} -msse4 ")
-		else()
-			set(COMPILER_FLAGS "${COMPILER_FLAGS} -mfpu=neon ")
-		endif()
+	if(LINUX OR MACOS OR WINDOWS)
+		set(COMPILER_FLAGS "${COMPILER_FLAGS} -msse4 ")
+	else()
+		set(COMPILER_FLAGS "${COMPILER_FLAGS} -mfpu=neon ")
 	endif()
 
 	if(ANKI_LTO)

+ 6 - 0
src/anki/math/Vec4.h

@@ -214,6 +214,12 @@ void TVec4<F32>::Base::normalize();
 template<>
 TVec4<F32> TVec4<F32>::cross(const TVec4<F32>& b) const;
 
+template<>
+TVec4<F32> TVec4<F32>::Base::min(const TVec4<F32>& b) const;
+
+template<>
+TVec4<F32> TVec4<F32>::Base::max(const TVec4<F32>& b) const;
+
 template<>
 TVec4<F32> TVec4<F32>::Base::getAbs() const;
 

+ 14 - 0
src/anki/math/Vec4.inl.h

@@ -138,6 +138,20 @@ inline TVec4<F32> TVec4<F32>::cross(const TVec4<F32>& b) const
 	return TVec4<F32>(_mm_sub_ps(tmp0, tmp1));
 }
 
+template<>
+inline TVec4<F32> TVec4<F32>::Base::min(const TVec4<F32>& b) const
+{
+	__m128 out = _mm_min_ps(m_simd, b.m_simd);
+	return TVec4<F32>(out);
+}
+
+template<>
+inline TVec4<F32> TVec4<F32>::Base::max(const TVec4<F32>& b) const
+{
+	__m128 out = _mm_max_ps(m_simd, b.m_simd);
+	return TVec4<F32>(out);
+}
+
 template<>
 inline F32 TVec4<F32>::Base::dot(const TVec4<F32>& b) const
 {

+ 4 - 2
src/anki/scene/Octree.cpp

@@ -277,6 +277,7 @@ void Octree::removeInternal(OctreePlaceable& placeable)
 
 			// Iterate the placeables of the leaf
 			Bool found = false;
+			(void)found;
 			for(PlaceableNode& placeableNode : leafNode.m_leaf->m_placeables)
 			{
 				if(placeableNode.m_placeable == &placeable)
@@ -309,7 +310,7 @@ void Octree::gatherVisibleRecursive(const Frustum& frustum,
 	OctreeNodeVisibilityTestCallback testCallback,
 	void* testCallbackUserData,
 	Leaf* leaf,
-	DynamicArrayAuto<OctreePlaceable*>& out)
+	DynamicArrayAuto<void*>& out)
 {
 	ANKI_ASSERT(leaf);
 
@@ -318,7 +319,8 @@ void Octree::gatherVisibleRecursive(const Frustum& frustum,
 	{
 		if(!placeableNode.m_placeable->alreadyVisited(testId))
 		{
-			out.emplaceBack(placeableNode.m_placeable);
+			ANKI_ASSERT(placeableNode.m_placeable->m_userData);
+			out.emplaceBack(placeableNode.m_placeable->m_userData);
 		}
 	}
 

+ 2 - 2
src/anki/scene/Octree.h

@@ -67,7 +67,7 @@ public:
 		U32 testId,
 		OctreeNodeVisibilityTestCallback testCallback,
 		void* testCallbackUserData,
-		DynamicArrayAuto<OctreePlaceable*>& out)
+		DynamicArrayAuto<void*>& out)
 	{
 		gatherVisibleRecursive(frustum, testId, testCallback, testCallbackUserData, m_rootLeaf, out);
 	}
@@ -226,7 +226,7 @@ private:
 		OctreeNodeVisibilityTestCallback testCallback,
 		void* testCallbackUserData,
 		Leaf* leaf,
-		DynamicArrayAuto<OctreePlaceable*>& out);
+		DynamicArrayAuto<void*>& out);
 
 	/// Remove a leaf.
 	void cleanupRecursive(Leaf* leaf, Bool& canDeleteLeafUponReturn);

+ 38 - 15
src/anki/scene/SoftwareRasterizer.cpp

@@ -321,36 +321,59 @@ Bool SoftwareRasterizer::visibilityTestInternal(const CollisionShape& cs, const
 	boxPoints[6] = Vec4(maxv.x(), minv.y(), minv.z(), 1.0f);
 	boxPoints[7] = Vec4(maxv.x(), maxv.y(), minv.z(), 1.0f);
 
-	// Compute bounding box
-	const Vec2 windowSize(m_width, m_height);
-
-	Vec2 bboxMin(MAX_F32), bboxMax(MIN_F32);
-	F32 minZ = MAX_F32;
+	// Transform points
 	for(Vec4& p : boxPoints)
 	{
 		p = m_mvp * p;
+	}
+
+	// Check of a point touches the near plane
+	for(const Vec4& p : boxPoints)
+	{
 		if(p.w() <= 0.0f)
 		{
 			// Don't bother clipping. Just mark it as visible.
 			return true;
 		}
+	}
 
-		p = p.perspectiveDivide();
+	// Compute the min and max bounds
+	Vec4 bboxMin(MAX_F32);
+	Vec4 bboxMax(MIN_F32);
+	for(Vec4& p : boxPoints)
+	{
+		// Perspecrive divide
+		p /= p.w();
 
-		for(U i = 0; i < 2; ++i)
-		{
-			F32 a = (p[i] / 2.0f + 0.5f) * windowSize[i];
+		// To [0, 1]
+		p *= Vec4(0.5f, 0.5f, 1.0f, 1.0f);
+		p += Vec4(0.5f, 0.5f, 0.0f, 0.0f);
 
-			bboxMin[i] = min(bboxMin[i], floorf(a));
-			bboxMin[i] = clamp(bboxMin[i], 0.0f, windowSize[i]);
+		// To [0, m_width|m_height]
+		p *= Vec4(m_width, m_height, 1.0f, 1.0f);
 
-			bboxMax[i] = max(bboxMax[i], ceilf(a));
-			bboxMax[i] = clamp(bboxMax[i], 0.0f, windowSize[i]);
-		}
+		// Min
+		bboxMin = bboxMin.min(p);
 
-		minZ = min(minZ, p.z());
+		// Max
+		bboxMax = bboxMax.max(p);
 	}
 
+	// Fix the bounds
+	bboxMin.x() = floorf(bboxMin.x());
+	bboxMin.x() = clamp(bboxMin.x(), 0.0f, F32(m_width));
+
+	bboxMax.x() = ceilf(bboxMax.x());
+	bboxMax.x() = clamp(bboxMax.x(), 0.0f, F32(m_width));
+
+	bboxMin.y() = floorf(bboxMin.y());
+	bboxMin.y() = clamp(bboxMin.y(), 0.0f, F32(m_height));
+
+	bboxMax.y() = ceilf(bboxMax.y());
+	bboxMax.y() = clamp(bboxMax.y(), 0.0f, F32(m_height));
+
+	// Loop the tiles
+	F32 minZ = bboxMin.z();
 	for(U y = bboxMin.y(); y < bboxMax.y(); y += 1.0f)
 	{
 		for(U x = bboxMin.x(); x < bboxMax.x(); x += 1.0f)

+ 17 - 15
src/anki/scene/Visibility.cpp

@@ -131,7 +131,7 @@ void VisibilityContext::submitNewWork(FrustumComponent& frc, RenderQueue& rqueue
 		auto& test = tests[i];
 		test.m_visCtx = this;
 		test.m_frc = &frc;
-		test.m_octreePlaceables = &gather->m_octreePlaceables;
+		test.m_visibleSpatialComponents = &gather->m_visibleSpatialComponents;
 		test.m_taskIdx = i;
 		test.m_taskCount = testCount;
 		test.m_r = r;
@@ -167,7 +167,7 @@ void VisibilityContext::submitNewWork(FrustumComponent& frc, RenderQueue& rqueue
 
 void GatherVisibleTrianglesTask::gather()
 {
-	ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_GATHER_TRIANGLES);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_VIS_GATHER_TRIANGLES);
 
 	auto alloc = m_visCtx->m_scene->getFrameAllocator();
 	m_verts.create(alloc, TRIANGLES_INITIAL_SIZE);
@@ -201,7 +201,7 @@ void GatherVisibleTrianglesTask::gather()
 
 void RasterizeTrianglesTask::rasterize()
 {
-	ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_RASTERIZE);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_VIS_RASTERIZE);
 
 	const U totalVertCount = m_gatherTask->m_vertCount;
 
@@ -214,7 +214,7 @@ void RasterizeTrianglesTask::rasterize()
 
 void GatherVisiblesFromOctreeTask::gather()
 {
-	ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_OCTREE);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_VIS_OCTREE);
 
 	U testIdx = m_visCtx->m_testsCount.fetchAdd(1);
 
@@ -222,30 +222,32 @@ void GatherVisiblesFromOctreeTask::gather()
 	auto testCallback = [](void* rasterizer, const Aabb& box) -> Bool {
 		ANKI_ASSERT(rasterizer);
 		SoftwareRasterizer* r = static_cast<SoftwareRasterizer*>(rasterizer);
-		return r->visibilityTest(box, box);
+		Bool inside = r->visibilityTest(box, box);
+		return inside;
 	};
 
 	// Test
-	DynamicArrayAuto<OctreePlaceable*> arr(m_visCtx->m_scene->getFrameAllocator());
-	OctreeNodeVisibilityTestCallback cb = (m_rasterizer) ? testCallback : nullptr;
+	DynamicArrayAuto<void*> arr(m_visCtx->m_scene->getFrameAllocator());
+	OctreeNodeVisibilityTestCallback cb =
+		(m_rasterizer) ? testCallback : static_cast<OctreeNodeVisibilityTestCallback>(nullptr);
 	m_visCtx->m_scene->getOctree().gatherVisible(m_frc->getFrustum(), testIdx, cb, m_rasterizer, arr);
 
 	// Store results
 	if(arr.getSize() > 0)
 	{
-		OctreePlaceable** data;
+		void** data;
 		PtrSize size;
 		PtrSize storage;
 		arr.moveAndReset(data, size, storage);
 
 		ANKI_ASSERT(data && size);
-		m_octreePlaceables = WeakArray<OctreePlaceable*>(data, size);
+		m_visibleSpatialComponents = WeakArray<void*>(data, size);
 	}
 }
 
 void VisibilityTestTask::test(ThreadHive& hive)
 {
-	ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_TEST);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_VIS_TEST);
 
 	FrustumComponent& testedFrc = *m_frc;
 	ANKI_ASSERT(testedFrc.anyVisibilityTestEnabled());
@@ -280,11 +282,11 @@ void VisibilityTestTask::test(ThreadHive& hive)
 
 	// Chose the test range and a few other things
 	PtrSize start, end;
-	ThreadPoolTask::choseStartEnd(m_taskIdx, m_taskCount, m_octreePlaceables->getSize(), start, end);
+	ThreadPoolTask::choseStartEnd(m_taskIdx, m_taskCount, m_visibleSpatialComponents->getSize(), start, end);
 	for(U i = start; i < end; ++i)
 	{
-		OctreePlaceable* placeable = (*m_octreePlaceables)[i];
-		SpatialComponent* spatialC = static_cast<SpatialComponent*>(placeable->m_userData);
+		void* spatialCPtr = (*m_visibleSpatialComponents)[i];
+		SpatialComponent* spatialC = static_cast<SpatialComponent*>(spatialCPtr);
 		SceneNode& node = spatialC->getSceneNode();
 
 		// Skip if it is the same
@@ -533,7 +535,7 @@ void VisibilityTestTask::test(ThreadHive& hive)
 
 void CombineResultsTask::combine()
 {
-	ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_COMBINE_RESULTS);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_VIS_COMBINE_RESULTS);
 
 	auto alloc = m_visCtx->m_scene->getFrameAllocator();
 
@@ -716,7 +718,7 @@ void CombineResultsTask::combineQueueElements(SceneFrameAllocator<U8>& alloc,
 
 void doVisibilityTests(SceneNode& fsn, SceneGraph& scene, RenderQueue& rqueue)
 {
-	ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_TESTS);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_VIS_TESTS);
 
 	ThreadHive& hive = scene.getThreadHive();
 	scene.getSectorGroup().prepareForVisibilityTests();

+ 2 - 2
src/anki/scene/VisibilityInternal.h

@@ -185,7 +185,7 @@ public:
 	VisibilityContext* m_visCtx ANKI_DBG_NULLIFY;
 	FrustumComponent* m_frc ANKI_DBG_NULLIFY; ///< What to test against.
 	SoftwareRasterizer* m_rasterizer ANKI_DBG_NULLIFY;
-	WeakArray<OctreePlaceable*> m_octreePlaceables; ///< The results of the task.
+	WeakArray<void*> m_visibleSpatialComponents; ///< The results of the task.
 
 	/// Thread hive task.
 	static void callback(void* ud, U32 threadId, ThreadHive& hive)
@@ -230,7 +230,7 @@ class VisibilityTestTask
 public:
 	WeakPtr<VisibilityContext> m_visCtx;
 	WeakPtr<FrustumComponent> m_frc;
-	WeakArray<OctreePlaceable*>* m_octreePlaceables ANKI_DBG_NULLIFY;
+	WeakArray<void*>* m_visibleSpatialComponents ANKI_DBG_NULLIFY;
 	U32 m_taskIdx;
 	U32 m_taskCount;
 	RenderQueueView m_result; ///< Sub result. Will be combined later.

+ 4 - 3
tests/scene/Octree.cpp

@@ -35,6 +35,7 @@ ANKI_TEST(Scene, Octree)
 			if(mode == 0)
 			{
 				// Place
+				placeables[i].m_userData = &placeables[i];
 				octree.place(volume, &placeables[i]);
 				placed.push_back(i);
 			}
@@ -54,16 +55,16 @@ ANKI_TEST(Scene, Octree)
 					placeables[idx].reset();
 				}
 
-				DynamicArrayAuto<OctreePlaceable*> arr(alloc);
+				DynamicArrayAuto<void*> arr(alloc);
 				octree.gatherVisible(frustum, 0, nullptr, nullptr, arr);
 
 				ANKI_TEST_EXPECT_EQ(arr.getSize(), placed.size());
 				for(U32 idx : placed)
 				{
 					Bool found = false;
-					for(OctreePlaceable* placeable : arr)
+					for(void* placeable : arr)
 					{
-						if(&placeables[idx] == placeable)
+						if(&placeables[idx] == static_cast<OctreePlaceable*>(placeable))
 						{
 							found = true;
 							break;