Browse Source

Merge pull request #35 from godlikepanos/octree2

Change how visibility tests are done
Panagiotis Christopoulos Charitos 7 years ago
parent
commit
15baf58cd5
45 changed files with 1277 additions and 843 deletions
  1. 4 6
      CMakeLists.txt
  2. 18 0
      programs/DepthDownscale.ankiprog
  3. 2 1
      programs/SceneDebug.ankiprog
  4. 5 0
      sandbox/Main.cpp
  5. 1 0
      src/anki/Core.h
  6. 16 0
      src/anki/collision/Aabb.h
  7. 2 37
      src/anki/collision/Frustum.cpp
  8. 17 18
      src/anki/collision/Frustum.h
  9. 8 6
      src/anki/core/App.cpp
  10. 1 1
      src/anki/core/Config.cpp
  11. 99 4
      src/anki/core/Trace.h
  12. 1 0
      src/anki/gr/Common.h
  13. 14 0
      src/anki/math/Functions.h
  14. 6 0
      src/anki/math/Vec4.h
  15. 14 0
      src/anki/math/Vec4.inl.h
  16. 2 2
      src/anki/renderer/Common.h
  17. 42 150
      src/anki/renderer/DebugDrawer.cpp
  18. 49 69
      src/anki/renderer/DebugDrawer.h
  19. 53 8
      src/anki/renderer/DepthDownscale.cpp
  20. 22 1
      src/anki/renderer/DepthDownscale.h
  21. 1 1
      src/anki/renderer/Reflections.cpp
  22. 6 0
      src/anki/renderer/RenderQueue.h
  23. 11 0
      src/anki/renderer/Renderer.cpp
  24. 1 8
      src/anki/resource/ShaderProgramResource.cpp
  25. 226 12
      src/anki/scene/Octree.cpp
  26. 116 5
      src/anki/scene/Octree.h
  27. 10 1
      src/anki/scene/SceneGraph.cpp
  28. 12 1
      src/anki/scene/SceneGraph.h
  29. 59 16
      src/anki/scene/SoftwareRasterizer.cpp
  30. 5 0
      src/anki/scene/SoftwareRasterizer.h
  31. 180 188
      src/anki/scene/Visibility.cpp
  32. 0 21
      src/anki/scene/Visibility.h
  33. 84 67
      src/anki/scene/VisibilityInternal.h
  34. 5 1
      src/anki/scene/components/FrustumComponent.cpp
  35. 44 0
      src/anki/scene/components/FrustumComponent.h
  36. 13 4
      src/anki/scene/components/SpatialComponent.cpp
  37. 7 33
      src/anki/scene/components/SpatialComponent.h
  38. 7 24
      src/anki/util/Functions.h
  39. 41 105
      src/anki/util/ThreadHive.cpp
  40. 49 8
      src/anki/util/ThreadHive.h
  41. 1 1
      src/anki/util/ThreadPosix.cpp
  42. 0 26
      src/anki/util/Tracer.h
  43. 1 1
      tests/gr/StackGpuAllocator.cpp
  44. 5 4
      tests/scene/Octree.cpp
  45. 17 13
      tests/util/ThreadHive.cpp

+ 4 - 6
CMakeLists.txt

@@ -177,12 +177,10 @@ if(NOT MSVC)
 		set(CXX_FLAGS "${CXX_FLAGS} -static-libstdc++ ")
 	endif()
 
-	if(ANKI_SIMD)
-		if(LINUX OR MACOS OR WINDOWS)
-			set(COMPILER_FLAGS "${COMPILER_FLAGS} -msse4 ")
-		else()
-			set(COMPILER_FLAGS "${COMPILER_FLAGS} -mfpu=neon ")
-		endif()
+	if(LINUX OR MACOS OR WINDOWS)
+		set(COMPILER_FLAGS "${COMPILER_FLAGS} -msse4 ")
+	else()
+		set(COMPILER_FLAGS "${COMPILER_FLAGS} -mfpu=neon ")
 	endif()
 
 	if(ANKI_LTO)

+ 18 - 0
programs/DepthDownscale.ankiprog

@@ -8,6 +8,7 @@ http://www.anki3d.org/LICENSE
 	<mutators>
 		<mutator name="TYPE" values="0 1"/> <!-- 0: write to depth&color, 1: write to depth -->
 		<mutator name="SAMPLE_RESOLVE_TYPE" values="0 1 2"/> <!-- 0: average, 1: min, 2: max -->
+		<mutator name="COPY_TO_CLIENT" values="0 1"/>
 	</mutators>
 
 	<shaders>
@@ -25,6 +26,19 @@ http://www.anki3d.org/LICENSE
 #define MIN 1
 #define MAX 2
 
+#if COPY_TO_CLIENT
+struct PushConsts
+{
+	uvec2 textureSize;
+};
+ANKI_PUSH_CONSTANTS(PushConsts, u_regs);
+
+layout(std430, ANKI_SS_BINDING(0, 0)) writeonly buffer s1_
+{
+	float u_clientBuf[];
+};
+#endif
+
 layout(ANKI_TEX_BINDING(0, 0)) uniform sampler2D u_depthRt;
 
 layout(location = 0) in vec2 in_uv;
@@ -50,6 +64,10 @@ void main()
 #if TYPE == 0
 	gl_FragDepth = out_color;
 #endif
+
+#if COPY_TO_CLIENT
+	u_clientBuf[uint(gl_FragCoord.y) * u_regs.textureSize.x + uint(gl_FragCoord.x)] = out_color;
+#endif
 }
 			]]></source>
 		</shader>

+ 2 - 1
programs/SceneDebug.ankiprog

@@ -61,7 +61,8 @@ layout(ANKI_UBO_BINDING(1, 0), row_major) uniform u0_
 	vec4 u_color;
 };
 
-#if DITHERED_DEPTH_TEST == 1
+// NOTE: Don't eliminate the binding because it confuses the descriptor set creation
+#if DITHERED_DEPTH_TEST == 1 || 1
 layout(ANKI_TEX_BINDING(0, 0)) uniform sampler2D u_depthRt;
 #endif
 

+ 5 - 0
sandbox/Main.cpp

@@ -161,6 +161,11 @@ Error MyApp::userMainLoop(Bool& quit)
 		renderer.getDbg().switchDepthTestEnabled();
 	}
 
+	if(in.getKey(KeyCode::F12) == 1)
+	{
+		CoreTracerSingleton::get().m_enabled = !CoreTracerSingleton::get().m_enabled;
+	}
+
 #if !PLAYER
 	if(in.getKey(KeyCode::UP))
 		mover->rotateLocalX(ang);

+ 1 - 0
src/anki/Core.h

@@ -8,3 +8,4 @@
 #include <anki/core/App.h>
 #include <anki/core/Config.h>
 #include <anki/core/NativeWindow.h>
+#include <anki/core/Trace.h>

+ 16 - 0
src/anki/collision/Aabb.h

@@ -34,6 +34,12 @@ public:
 		ANKI_ASSERT(m_min.xyz() < m_max.xyz());
 	}
 
+	Aabb(const Vec3& min, const Vec3& max)
+		: Aabb(Vec4(min, 0.0f), Vec4(max, 0.0f))
+	{
+		ANKI_ASSERT(m_min.xyz() < m_max.xyz());
+	}
+
 	Aabb(const Aabb& b)
 		: Base(CollisionShapeType::AABB)
 	{
@@ -51,6 +57,11 @@ public:
 		m_min = x;
 	}
 
+	void setMin(const Vec3& x)
+	{
+		setMin(Vec4(x, 0.0f));
+	}
+
 	const Vec4& getMax() const
 	{
 		return m_max;
@@ -62,6 +73,11 @@ public:
 		m_max = x;
 	}
 
+	void setMax(const Vec3& x)
+	{
+		setMax(Vec4(x, 0.0f));
+	}
+
 	/// Copy.
 	Aabb& operator=(const Aabb& b)
 	{

+ 2 - 37
src/anki/collision/Frustum.cpp

@@ -18,38 +18,31 @@ Frustum& Frustum::operator=(const Frustum& b)
 	m_planesL = b.m_planesL;
 	m_planesW = b.m_planesW;
 	m_trf = b.m_trf;
-	m_frustumDirty = b.m_frustumDirty;
 	return *this;
 }
 
 void Frustum::accept(MutableVisitor& v)
 {
-	update();
 	CompoundShape::accept(v);
 }
 
 void Frustum::accept(ConstVisitor& v) const
 {
-	update();
 	CompoundShape::accept(v);
 }
 
 F32 Frustum::testPlane(const Plane& p) const
 {
-	update();
 	return CompoundShape::testPlane(p);
 }
 
 void Frustum::computeAabb(Aabb& aabb) const
 {
-	update();
 	CompoundShape::computeAabb(aabb);
 }
 
 Bool Frustum::insideFrustum(const CollisionShape& b) const
 {
-	update();
-
 	for(const Plane& plane : m_planesW)
 	{
 		if(b.testPlane(plane) < 0.0)
@@ -70,39 +63,11 @@ void Frustum::transform(const Transform& trf)
 void Frustum::resetTransform(const Transform& trf)
 {
 	m_trf = trf;
-
-	if(m_frustumDirty)
-	{
-		// Update everything
-		updateInternal();
-	}
-	else
-	{
-		// Inform the child about the change
-		onTransform();
-
-		// Transform the planes
-		for(U i = 0; i < m_planesL.getSize(); ++i)
-		{
-			m_planesW[i] = m_planesL[i].getTransformed(m_trf);
-		}
-	}
-}
-
-void Frustum::update() const
-{
-	Frustum& self = *const_cast<Frustum*>(this);
-	LockGuard<SpinLock> lock(self.m_lock);
-	if(self.m_frustumDirty)
-	{
-		self.updateInternal();
-	}
+	update();
 }
 
-void Frustum::updateInternal()
+void Frustum::update()
 {
-	ANKI_ASSERT(m_frustumDirty);
-	m_frustumDirty = false;
 	recalculate();
 
 	// Transform derived

+ 17 - 18
src/anki/collision/Frustum.h

@@ -60,20 +60,22 @@ public:
 	{
 		return m_near;
 	}
+
 	void setNear(const F32 x)
 	{
 		m_near = x;
-		m_frustumDirty = true;
+		update();
 	}
 
 	F32 getFar() const
 	{
 		return m_far;
 	}
+
 	void setFar(const F32 x)
 	{
 		m_far = x;
-		m_frustumDirty = true;
+		update();
 	}
 
 	const Transform& getTransform() const
@@ -124,9 +126,6 @@ protected:
 	/// Keep the transformation.
 	Transform m_trf = Transform::getIdentity();
 
-	/// It's true when the frustum changed
-	Bool8 m_frustumDirty = true;
-
 	/// Called when a viewing variable changes. It recalculates the planes and the other variables.
 	virtual void recalculate() = 0;
 
@@ -134,15 +133,13 @@ protected:
 	virtual void onTransform() = 0;
 
 	/// Update if dirty
-	void update() const;
-	void updateInternal();
+	void update();
 
 	/// Copy
 	Frustum& operator=(const Frustum& b);
 
 private:
 	FrustumType m_type;
-	SpinLock m_lock;
 };
 
 /// Frustum shape for perspective cameras
@@ -174,7 +171,7 @@ public:
 	void setFovX(F32 ang)
 	{
 		m_fovX = ang;
-		m_frustumDirty = true;
+		update();
 	}
 
 	/// Get FOV on Y axis.
@@ -185,7 +182,7 @@ public:
 	void setFovY(F32 ang)
 	{
 		m_fovY = ang;
-		m_frustumDirty = true;
+		update();
 	}
 
 	/// Set all the parameters and recalculate the planes and shape
@@ -199,12 +196,11 @@ public:
 		m_fovY = fovY;
 		m_near = near;
 		m_far = far;
-		m_frustumDirty = true;
+		update();
 	}
 
 	const Array<Vec4, 5>& getPoints() const
 	{
-		update();
 		return m_pointsW;
 	}
 
@@ -269,40 +265,44 @@ public:
 	{
 		return m_left;
 	}
+
 	void setLeft(F32 f)
 	{
 		m_left = f;
-		m_frustumDirty = true;
+		update();
 	}
 
 	F32 getRight() const
 	{
 		return m_right;
 	}
+
 	void setRight(F32 f)
 	{
 		m_right = f;
-		m_frustumDirty = true;
+		update();
 	}
 
 	F32 getTop() const
 	{
 		return m_top;
 	}
+
 	void setTop(F32 f)
 	{
 		m_top = f;
-		m_frustumDirty = true;
+		update();
 	}
 
 	F32 getBottom() const
 	{
 		return m_bottom;
 	}
+
 	void setBottom(F32 f)
 	{
 		m_bottom = f;
-		m_frustumDirty = true;
+		update();
 	}
 
 	/// Set all
@@ -315,13 +315,12 @@ public:
 		m_far = far;
 		m_top = top;
 		m_bottom = bottom;
-		m_frustumDirty = true;
+		update();
 	}
 
 	/// Needed for debug drawing
 	const Obb& getObb() const
 	{
-		update();
 		return m_obbW;
 	}
 

+ 8 - 6
src/anki/core/App.cpp

@@ -16,6 +16,7 @@
 #include <anki/core/NativeWindow.h>
 #include <anki/input/Input.h>
 #include <anki/scene/SceneGraph.h>
+#include <anki/renderer/RenderQueue.h>
 #include <anki/resource/ResourceManager.h>
 #include <anki/physics/PhysicsWorld.h>
 #include <anki/renderer/MainRenderer.h>
@@ -24,6 +25,7 @@
 #include <anki/resource/AsyncLoader.h>
 #include <anki/core/StagingGpuMemoryManager.h>
 #include <anki/ui/UiManager.h>
+#include <anki/ui/Canvas.h>
 
 #if ANKI_OS == ANKI_OS_ANDROID
 #	include <android_native_app_glue.h>
@@ -337,16 +339,16 @@ void App::cleanup()
 	}
 
 #if ANKI_ENABLE_TRACE
-	if(TracerSingleton::get().isInitialized())
+	if(CoreTracerSingleton::get().isInitialized())
 	{
 		StringAuto fname(m_heapAlloc);
 		fname.sprintf("%s/trace", m_settingsDir.cstr());
 		ANKI_CORE_LOGI("Will dump trace files: %s", fname.cstr());
-		if(TracerSingleton::get().flush(fname.toCString()))
+		if(CoreTracerSingleton::get().flush(fname.toCString()))
 		{
 			ANKI_CORE_LOGE("Ignoring error from the tracer");
 		}
-		TracerSingleton::destroy();
+		CoreTracerSingleton::destroy();
 	}
 #endif
 
@@ -375,8 +377,8 @@ Error App::initInternal(const ConfigSet& config_, AllocAlignedCallback allocCb,
 	m_heapAlloc = HeapAllocator<U8>(m_allocCb, m_allocCbData);
 
 #if ANKI_ENABLE_TRACE
-	TracerSingleton::get().init(m_heapAlloc);
-	TracerSingleton::get().newFrame(0);
+	CoreTracerSingleton::get().init(m_heapAlloc);
+	CoreTracerSingleton::get().newFrame(0);
 #endif
 
 	ANKI_CHECK(initDirs(config));
@@ -620,7 +622,7 @@ Error App::mainLoop()
 	{
 #if ANKI_ENABLE_TRACE
 		static U64 frame = 1;
-		TracerSingleton::get().newFrame(frame++);
+		CoreTracerSingleton::get().newFrame(frame++);
 #endif
 		ANKI_TRACE_START_EVENT(FRAME);
 		const Second startTime = HighRezTimer::getCurrentTime();

+ 1 - 1
src/anki/core/Config.cpp

@@ -70,7 +70,7 @@ Config::Config()
 	newOption("core.storagePerFrameMemorySize", 16_MB);
 	newOption("core.vertexPerFrameMemorySize", 10_MB);
 	newOption("core.textureBufferPerFrameMemorySize", 1_MB);
-	newOption("core.mainThreadCount", getCpuCoresCount() / 2);
+	newOption("core.mainThreadCount", getCpuCoresCount() / 2 - 1);
 	newOption("core.displayStats", false);
 	newOption("core.clearCaches", false);
 }

+ 99 - 4
src/anki/core/Trace.h

@@ -8,13 +8,106 @@
 #include <anki/core/Common.h>
 #include <anki/util/Tracer.h>
 
+namespace anki
+{
+
+/// @addtogroup core
+/// @{
+
+/// Core tracer.
+class CoreTracer
+{
+public:
+	Tracer m_tracer;
+	Bool m_enabled = false;
+
+	/// @copydoc Tracer::init
+	void init(GenericMemoryPoolAllocator<U8> alloc)
+	{
+		m_tracer.init(alloc);
+	}
+
+	/// @copydoc Tracer::isInitialized
+	Bool isInitialized() const
+	{
+		return m_tracer.isInitialized();
+	}
+
+	/// @copydoc Tracer::beginEvent
+	ANKI_USE_RESULT TracerEventHandle beginEvent()
+	{
+		if(m_enabled)
+		{
+			return m_tracer.beginEvent();
+		}
+
+		return nullptr;
+	}
+
+	/// @copydoc Tracer::endEvent
+	void endEvent(const char* eventName, TracerEventHandle event)
+	{
+		if(event != nullptr)
+		{
+			m_tracer.endEvent(eventName, event);
+		}
+	}
+
+	/// @copydoc Tracer::increaseCounter
+	void increaseCounter(const char* counterName, U64 value)
+	{
+		if(m_enabled)
+		{
+			m_tracer.increaseCounter(counterName, value);
+		}
+	}
+
+	/// @copydoc Tracer::newFrame
+	void newFrame(U64 frame)
+	{
+		if(m_enabled)
+		{
+			m_tracer.newFrame(frame);
+		}
+	}
+
+	/// @copydoc Tracer::flush
+	ANKI_USE_RESULT Error flush(CString filename)
+	{
+		return m_tracer.flush(filename);
+	}
+};
+
+using CoreTracerSingleton = Singleton<CoreTracer>;
+
+class CoreTraceScopedEvent
+{
+public:
+	CoreTraceScopedEvent(const char* name)
+		: m_name(name)
+		, m_tracer(&CoreTracerSingleton::get())
+	{
+		m_handle = m_tracer->beginEvent();
+	}
+
+	~CoreTraceScopedEvent()
+	{
+		m_tracer->endEvent(m_name, m_handle);
+	}
+
+private:
+	const char* m_name;
+	TracerEventHandle m_handle;
+	CoreTracer* m_tracer;
+};
+
 /// @name Trace macros.
 /// @{
 #if ANKI_ENABLE_TRACE
-#	define ANKI_TRACE_START_EVENT(name_) TracerEventHandle _teh##name_ = TracerSingleton::get().beginEvent()
-#	define ANKI_TRACE_STOP_EVENT(name_) TracerSingleton::get().endEvent(#	name_, _teh##name_)
-#	define ANKI_TRACE_SCOPED_EVENT(name_) TraceScopedEvent _tse##name_(#	name_)
-#	define ANKI_TRACE_INC_COUNTER(name_, val_) TracerSingleton::get().increaseCounter(#	name_, val_)
+#	define ANKI_TRACE_START_EVENT(name_) TracerEventHandle _teh##name_ = CoreTracerSingleton::get().beginEvent()
+#	define ANKI_TRACE_STOP_EVENT(name_) CoreTracerSingleton::get().endEvent(#	name_, _teh##name_)
+#	define ANKI_TRACE_SCOPED_EVENT(name_) CoreTraceScopedEvent _tse##name_(#	name_)
+#	define ANKI_TRACE_INC_COUNTER(name_, val_) CoreTracerSingleton::get().increaseCounter(#	name_, val_)
 #else
 #	define ANKI_TRACE_START_EVENT(name_) ((void)0)
 #	define ANKI_TRACE_STOP_EVENT(name_) ((void)0)
@@ -22,3 +115,5 @@
 #	define ANKI_TRACE_INC_COUNTER(name_, val_) ((void)0)
 #endif
 /// @}
+
+} // end namespace anki

+ 1 - 0
src/anki/gr/Common.h

@@ -367,6 +367,7 @@ private:
 /// Compute max number of mipmaps for a 2D texture.
 inline U computeMaxMipmapCount2d(U w, U h, U minSizeOfLastMip = 1)
 {
+	ANKI_ASSERT(w > minSizeOfLastMip && h > minSizeOfLastMip);
 	U s = (w < h) ? w : h;
 	U count = 0;
 	while(s >= minSizeOfLastMip)

+ 14 - 0
src/anki/math/Functions.h

@@ -85,6 +85,20 @@ inline T mod(const T x, const T y)
 	return x - y * std::floor(x / y);
 }
 
+/// Like GLSL's mix.
+template<typename T, typename Y>
+inline T mix(T x, T y, Y factor)
+{
+	return x * (T(1) - factor) + y * factor;
+}
+
+/// Like GLSL's modf
+template<typename T>
+inline T modf(T x, T& intPart)
+{
+	return std::modf(x, &intPart);
+}
+
 /// The same as abs/fabs. For ints and floats.
 template<typename T>
 T absolute(const T f);

+ 6 - 0
src/anki/math/Vec4.h

@@ -214,6 +214,12 @@ void TVec4<F32>::Base::normalize();
 template<>
 TVec4<F32> TVec4<F32>::cross(const TVec4<F32>& b) const;
 
+template<>
+TVec4<F32> TVec4<F32>::Base::min(const TVec4<F32>& b) const;
+
+template<>
+TVec4<F32> TVec4<F32>::Base::max(const TVec4<F32>& b) const;
+
 template<>
 TVec4<F32> TVec4<F32>::Base::getAbs() const;
 

+ 14 - 0
src/anki/math/Vec4.inl.h

@@ -138,6 +138,20 @@ inline TVec4<F32> TVec4<F32>::cross(const TVec4<F32>& b) const
 	return TVec4<F32>(_mm_sub_ps(tmp0, tmp1));
 }
 
+template<>
+inline TVec4<F32> TVec4<F32>::Base::min(const TVec4<F32>& b) const
+{
+	__m128 out = _mm_min_ps(m_simd, b.m_simd);
+	return TVec4<F32>(out);
+}
+
+template<>
+inline TVec4<F32> TVec4<F32>::Base::max(const TVec4<F32>& b) const
+{
+	__m128 out = _mm_max_ps(m_simd, b.m_simd);
+	return TVec4<F32>(out);
+}
+
 template<>
 inline F32 TVec4<F32>::Base::dot(const TVec4<F32>& b) const
 {

+ 2 - 2
src/anki/renderer/Common.h

@@ -68,8 +68,8 @@ const U BLOOM_FRACTION = 4;
 /// Volumetric size is rendererSize/VOLUMETRIC_FRACTION.
 const U VOLUMETRIC_FRACTION = 4;
 
-/// Number of mipmaps of the HZ map.
-const U HIERARCHICAL_Z_MIPMAP_COUNT = 4;
+/// Used to calculate the mipmap count of the HiZ map.
+const U HIERARCHICAL_Z_MIN_HEIGHT = 80;
 
 const TextureSubresourceInfo HIZ_HALF_DEPTH(TextureSurfaceInfo(0, 0, 0, 0));
 const TextureSubresourceInfo HIZ_QUARTER_DEPTH(TextureSurfaceInfo(1, 0, 0, 0));

+ 42 - 150
src/anki/renderer/DebugDrawer.cpp

@@ -17,143 +17,69 @@
 namespace anki
 {
 
-DebugDrawer::DebugDrawer()
-{
-}
-
-DebugDrawer::~DebugDrawer()
-{
-}
-
 Error DebugDrawer::init(Renderer* r)
 {
+	ANKI_ASSERT(r);
 	m_r = r;
-	GrManager& gr = r->getGrManager();
 
 	// Create the prog and shaders
-	ANKI_CHECK(r->getResourceManager().loadResource("programs/Dbg.ankiprog", m_prog));
+	ANKI_CHECK(r->getResourceManager().loadResource("programs/SceneDebug.ankiprog", m_prog));
+	ShaderProgramResourceConstantValueInitList<1> consts(m_prog);
+	consts.add("INSTANCE_COUNT", 1u);
+	ShaderProgramResourceMutationInitList<2> mutations(m_prog);
+	mutations.add("COLOR_TEXTURE", 0).add("DITHERED_DEPTH_TEST", 0);
+
 	const ShaderProgramResourceVariant* variant;
-	m_prog->getOrCreateVariant(variant);
+	m_prog->getOrCreateVariant(mutations.get(), consts.get(), variant);
 	m_grProg = variant->getProgram();
 
-	// Create the vert buffs
-	for(BufferPtr& v : m_vertBuff)
-	{
-		v = gr.newBuffer(BufferInitInfo(
-			sizeof(Vertex) * MAX_VERTS_PER_FRAME, BufferUsageBit::VERTEX, BufferMapAccessBit::WRITE, "DbgDrawer"));
-	}
-
-	m_mMat.setIdentity();
-	m_vpMat.setIdentity();
-	m_mvpMat.setIdentity();
-
 	return Error::NONE;
 }
 
-void DebugDrawer::prepareFrame(CommandBufferPtr& jobs)
-{
-	m_cmdb = jobs;
-
-	U frame = m_r->getFrameCount() % MAX_FRAMES_IN_FLIGHT;
-	void* mapped = m_vertBuff[frame]->map(0, MAX_VERTS_PER_FRAME * sizeof(Vertex), BufferMapAccessBit::WRITE);
-	m_clientVerts = WeakArray<Vertex>(static_cast<Vertex*>(mapped), MAX_VERTS_PER_FRAME);
-
-	m_cmdb->bindVertexBuffer(0, m_vertBuff[frame], 0, 2 * sizeof(Vec4));
-	m_cmdb->setVertexAttribute(0, 0, Format::R32G32B32A32_SFLOAT, 0);
-	m_cmdb->setVertexAttribute(1, 0, Format::R32G32B32A32_SFLOAT, sizeof(Vec4));
-
-	m_cmdb->bindShaderProgram(m_grProg);
-
-	m_frameVertCount = 0;
-	m_crntDrawVertCount = 0;
-}
-
-void DebugDrawer::finishFrame()
-{
-	U frame = m_r->getFrameCount() % MAX_FRAMES_IN_FLIGHT;
-	m_vertBuff[frame]->unmap();
-
-	flush();
-
-	// Restore state
-	m_cmdb->setDepthCompareOperation(CompareOperation::ALWAYS);
-
-	m_cmdb = CommandBufferPtr(); // Release command buffer
-}
-
-void DebugDrawer::setModelMatrix(const Mat4& m)
-{
-	m_mMat = m;
-	m_mvpMat = m_vpMat * m_mMat;
-}
-
-void DebugDrawer::setViewProjectionMatrix(const Mat4& m)
-{
-	m_vpMat = m;
-	m_mvpMat = m_vpMat * m_mMat;
-}
-
-void DebugDrawer::begin(PrimitiveTopology topology)
+void DebugDrawer::flush()
 {
-	ANKI_ASSERT(topology == PrimitiveTopology::LINES || topology == PrimitiveTopology::TRIANGLES);
-
-	if(topology != m_primitive)
+	if(m_cachedPositionCount > 0)
 	{
-		flush();
-	}
+		m_cmdb->bindShaderProgram(m_grProg);
 
-	m_primitive = topology;
-}
-
-void DebugDrawer::end()
-{
-}
+		// Set vertex state
+		const U32 size = m_cachedPositionCount * sizeof(Vec3);
 
-void DebugDrawer::pushBackVertex(const Vec3& pos)
-{
-	if(m_frameVertCount < MAX_VERTS_PER_FRAME)
-	{
-		m_clientVerts[m_frameVertCount].m_position = m_mvpMat * Vec4(pos, 1.0);
-		m_clientVerts[m_frameVertCount].m_color = Vec4(m_crntCol, 1.0);
+		StagingGpuMemoryToken token;
+		void* mem = m_r->getStagingGpuMemoryManager().allocateFrame(size, StagingGpuMemoryType::VERTEX, token);
+		memcpy(mem, &m_cachedPositions[0], size);
 
-		++m_frameVertCount;
-		++m_crntDrawVertCount;
-	}
-	else
-	{
-		ANKI_R_LOGW("Increase DebugDrawer::MAX_VERTS_PER_FRAME");
-	}
-}
+		m_cmdb->bindVertexBuffer(0, token.m_buffer, token.m_offset, sizeof(Vec3));
+		m_cmdb->setVertexAttribute(0, 0, Format::R32G32B32_SFLOAT, 0);
 
-void DebugDrawer::flush()
-{
-	if(m_crntDrawVertCount > 0)
-	{
-		if(m_primitive == PrimitiveTopology::LINES)
-		{
-			ANKI_ASSERT((m_crntDrawVertCount % 2) == 0);
-		}
-		else
+		// Set uniform state
+		struct Uniforms
 		{
-			ANKI_ASSERT((m_crntDrawVertCount % 3) == 0);
-		}
+			Mat4 m_mvp;
+			Vec4 m_color;
+		};
 
-		m_cmdb->setDepthCompareOperation((m_depthTestEnabled) ? CompareOperation::LESS : CompareOperation::ALWAYS);
+		Uniforms* uniforms = static_cast<Uniforms*>(
+			m_r->getStagingGpuMemoryManager().allocateFrame(sizeof(Uniforms), StagingGpuMemoryType::UNIFORM, token));
+		uniforms->m_mvp = m_mvpMat;
+		uniforms->m_color = m_crntCol;
 
-		U firstVert = m_frameVertCount - m_crntDrawVertCount;
-		m_cmdb->drawArrays(m_primitive, m_crntDrawVertCount, 1, firstVert);
+		m_cmdb->bindUniformBuffer(1, 0, token.m_buffer, token.m_offset, token.m_range);
 
-		m_crntDrawVertCount = 0;
+		// Draw
+		m_cmdb->drawArrays(m_topology, m_cachedPositionCount);
+
+		// Other
+		m_cachedPositionCount = 0;
 	}
 }
 
 void DebugDrawer::drawLine(const Vec3& from, const Vec3& to, const Vec4& color)
 {
 	setColor(color);
-	begin(PrimitiveTopology::LINES);
+	setTopology(PrimitiveTopology::LINES);
 	pushBackVertex(from);
 	pushBackVertex(to);
-	end();
 }
 
 void DebugDrawer::drawGrid()
@@ -168,8 +94,7 @@ void DebugDrawer::drawGrid()
 	const F32 GRID_HALF_SIZE = ((NUM - 1) * SPACE / 2);
 
 	setColor(col0);
-
-	begin(PrimitiveTopology::LINES);
+	setTopology(PrimitiveTopology::LINES);
 
 	for(I x = -NUM / 2 * SPACE; x < NUM / 2 * SPACE; x += SPACE)
 	{
@@ -211,20 +136,14 @@ void DebugDrawer::drawGrid()
 			pushBackVertex(Vec3(GRID_HALF_SIZE, 0.0, x));
 		}
 	}
-
-	// render
-	end();
 }
 
 void DebugDrawer::drawSphere(F32 radius, I complexity)
 {
-#if 1
 	Mat4 oldMMat = m_mMat;
-	Mat4 oldVpMat = m_vpMat;
 
 	setModelMatrix(m_mMat * Mat4(Vec4(0.0, 0.0, 0.0, 1.0), Mat3::getIdentity(), radius));
-
-	begin(PrimitiveTopology::LINES);
+	setTopology(PrimitiveTopology::LINES);
 
 	// Pre-calculate the sphere points5
 	F32 fi = PI / complexity;
@@ -253,11 +172,7 @@ void DebugDrawer::drawSphere(F32 radius, I complexity)
 		prev = p;
 	}
 
-	end();
-
-	m_mMat = oldMMat;
-	m_vpMat = oldVpMat;
-#endif
+	setModelMatrix(oldMMat);
 }
 
 void DebugDrawer::drawCube(F32 size)
@@ -278,12 +193,11 @@ void DebugDrawer::drawCube(F32 size)
 
 	static const Array<U32, 24> indeces = {{0, 1, 1, 2, 2, 3, 3, 0, 4, 5, 5, 6, 6, 7, 7, 4, 0, 4, 1, 5, 2, 6, 3, 7}};
 
-	begin(PrimitiveTopology::LINES);
+	setTopology(PrimitiveTopology::LINES);
 	for(U32 id : indeces)
 	{
 		pushBackVertex(points[id]);
 	}
-	end();
 }
 
 void CollisionDebugDrawer::visit(const Sphere& sphere)
@@ -322,10 +236,9 @@ void CollisionDebugDrawer::visit(const Plane& plane)
 void CollisionDebugDrawer::visit(const LineSegment& ls)
 {
 	m_dbg->setModelMatrix(Mat4::getIdentity());
-	m_dbg->begin(PrimitiveTopology::LINES);
+	m_dbg->setTopology(PrimitiveTopology::LINES);
 	m_dbg->pushBackVertex(ls.getOrigin().xyz());
 	m_dbg->pushBackVertex((ls.getOrigin() + ls.getDirection()).xyz());
-	m_dbg->end();
 }
 
 void CollisionDebugDrawer::visit(const Aabb& aabb)
@@ -372,12 +285,11 @@ void CollisionDebugDrawer::visit(const Frustum& f)
 
 		const U32 indeces[] = {0, 1, 0, 2, 0, 3, 0, 4, 1, 2, 2, 3, 3, 4, 4, 1};
 
-		m_dbg->begin(PrimitiveTopology::LINES);
+		m_dbg->setTopology(PrimitiveTopology::LINES);
 		for(U32 i = 0; i < sizeof(indeces) / sizeof(U32); i++)
 		{
 			m_dbg->pushBackVertex(points[indeces[i]]);
 		}
-		m_dbg->end();
 		break;
 	}
 	}
@@ -396,7 +308,7 @@ void CollisionDebugDrawer::visit(const CompoundShape& cs)
 void CollisionDebugDrawer::visit(const ConvexHullShape& hull)
 {
 	m_dbg->setModelMatrix(Mat4(hull.getTransform()));
-	m_dbg->begin(PrimitiveTopology::LINES);
+	m_dbg->setTopology(PrimitiveTopology::LINES);
 	const Vec4* points = hull.getPoints() + 1;
 	const Vec4* end = hull.getPoints() + hull.getPointsCount();
 	for(; points != end; ++points)
@@ -404,36 +316,16 @@ void CollisionDebugDrawer::visit(const ConvexHullShape& hull)
 		m_dbg->pushBackVertex(hull.getPoints()->xyz());
 		m_dbg->pushBackVertex(points->xyz());
 	}
-	m_dbg->end();
 }
 
 void PhysicsDebugDrawer::drawLines(const Vec3* lines, const U32 linesCount, const Vec4& color)
 {
-	m_dbg->begin(PrimitiveTopology::LINES);
+	m_dbg->setTopology(PrimitiveTopology::LINES);
 	m_dbg->setColor(color);
 	for(U i = 0; i < linesCount * 2; ++i)
 	{
 		m_dbg->pushBackVertex(lines[i]);
 	}
-	m_dbg->end();
-}
-
-void SceneDebugDrawer::draw(const RenderableQueueElement& r) const
-{
-	// TODO
-}
-
-void SceneDebugDrawer::draw(const PointLightQueueElement& light) const
-{
-	m_dbg->setColor(light.m_diffuseColor);
-	CollisionDebugDrawer coldraw(m_dbg);
-	Sphere sphere(light.m_worldPosition.xyz0(), light.m_radius);
-	sphere.accept(coldraw);
-}
-
-void SceneDebugDrawer::draw(const SpotLightQueueElement& light) const
-{
-	// TODO
 }
 
 } // end namespace anki

+ 49 - 69
src/anki/renderer/DebugDrawer.h

@@ -16,9 +16,6 @@
 namespace anki
 {
 
-// Forward
-class Renderer;
-
 /// @addtogroup renderer
 /// @{
 
@@ -26,84 +23,86 @@ class Renderer;
 class DebugDrawer
 {
 public:
-	DebugDrawer();
-	~DebugDrawer();
-
 	ANKI_USE_RESULT Error init(Renderer* r);
 
+	void prepareFrame(CommandBufferPtr& cmdb)
+	{
+		m_cmdb = cmdb;
+	}
+
+	void finishFrame()
+	{
+		flush();
+		m_cmdb.reset(nullptr);
+	}
+
 	void drawGrid();
 	void drawSphere(F32 radius, I complexity = 8);
 	void drawCube(F32 size = 1.0);
 	void drawLine(const Vec3& from, const Vec3& to, const Vec4& color);
 
-	void prepareFrame(CommandBufferPtr& jobs);
-
-	void finishFrame();
-
-	/// @name Render functions. Imitate the GL 1.1 immediate mode
-	/// @{
-	void begin(PrimitiveTopology topology); ///< Initiates the draw
-
-	void end(); ///< Draws
-
-	void pushBackVertex(const Vec3& pos); ///< Something like glVertex
+	void setTopology(PrimitiveTopology topology)
+	{
+		if(topology != m_topology)
+		{
+			flush();
+		}
+		m_topology = topology;
+	}
 
-	/// Something like glColor
-	void setColor(const Vec3& col)
+	void pushBackVertex(const Vec3& pos)
 	{
-		m_crntCol = col;
+		if((m_cachedPositionCount + 3) >= m_cachedPositions.getSize())
+		{
+			flush();
+			ANKI_ASSERT(m_cachedPositionCount == 0);
+		}
+		m_cachedPositions[m_cachedPositionCount++] = pos;
 	}
 
 	/// Something like glColor
 	void setColor(const Vec4& col)
 	{
-		m_crntCol = col.xyz();
+		if(m_crntCol != col)
+		{
+			flush();
+		}
+		m_crntCol = col;
 	}
 
-	void setModelMatrix(const Mat4& m);
-
-	void setViewProjectionMatrix(const Mat4& m);
-	/// @}
-
-	void setDepthTestEnabled(Bool enabled)
+	void setModelMatrix(const Mat4& m)
 	{
-		m_depthTestEnabled = enabled;
+		flush();
+		m_mMat = m;
+		m_mvpMat = m_vpMat * m_mMat;
 	}
 
-	Bool getDepthTestEnabled() const
+	void setViewProjectionMatrix(const Mat4& m)
 	{
-		return m_depthTestEnabled;
+		flush();
+		m_vpMat = m;
+		m_mvpMat = m_vpMat * m_mMat;
 	}
 
 private:
-	class Vertex
-	{
-	public:
-		Vec4 m_position;
-		Vec4 m_color;
-	};
-
-	static const U MAX_VERTS_PER_FRAME = 1024 * 1024;
-
 	Renderer* m_r;
 	ShaderProgramResourcePtr m_prog;
 	ShaderProgramPtr m_grProg;
-	Array<BufferPtr, MAX_FRAMES_IN_FLIGHT> m_vertBuff;
 
 	CommandBufferPtr m_cmdb;
-	WeakArray<Vertex> m_clientVerts;
 
-	Mat4 m_mMat;
-	Mat4 m_vpMat;
-	Mat4 m_mvpMat; ///< Optimization.
-	Vec3 m_crntCol = Vec3(1.0, 0.0, 0.0);
-	PrimitiveTopology m_primitive = PrimitiveTopology::LINES;
-	U32 m_frameVertCount = 0;
-	U32 m_crntDrawVertCount = 0;
+	// State
+	Mat4 m_mMat = Mat4::getIdentity();
+	Mat4 m_vpMat = Mat4::getIdentity();
+	Mat4 m_mvpMat = Mat4::getIdentity(); ///< Optimization.
+	Vec4 m_crntCol = Vec4(1.0f, 0.0f, 0.0f, 1.0f);
+	PrimitiveTopology m_topology = PrimitiveTopology::LINES;
 
-	DynamicArray<Vec3> m_sphereVerts;
+	static const U MAX_VERTS_BEFORE_FLUSH = 128;
+	Array<Vec3, MAX_VERTS_BEFORE_FLUSH> m_cachedPositions;
+	U32 m_cachedPositionCount = 0;
 
-	Bool8 m_depthTestEnabled = true;
+	DynamicArray<Vec3> m_sphereVerts;
 
 	void flush();
 };
@@ -152,25 +151,6 @@ public:
 private:
 	DebugDrawer* m_dbg; ///< The debug drawer
 };
-
-/// This is a drawer for some scene nodes that need debug
-class SceneDebugDrawer
-{
-public:
-	SceneDebugDrawer(DebugDrawer* d)
-		: m_dbg(d)
-	{
-	}
-
-	void draw(const RenderableQueueElement& r) const;
-
-	void draw(const PointLightQueueElement& light) const;
-
-	void draw(const SpotLightQueueElement& light) const;
-
-private:
-	DebugDrawer* m_dbg;
-};
 /// @}
 
 } // end namespace anki

+ 53 - 8
src/anki/renderer/DepthDownscale.cpp

@@ -12,6 +12,12 @@ namespace anki
 
 DepthDownscale::~DepthDownscale()
 {
+	m_passes.destroy(getAllocator());
+
+	if(m_copyToBuff.m_buffAddr)
+	{
+		m_copyToBuff.m_buff->unmap();
+	}
 }
 
 Error DepthDownscale::initInternal(const ConfigSet&)
@@ -19,13 +25,22 @@ Error DepthDownscale::initInternal(const ConfigSet&)
 	const U width = m_r->getWidth() / 2;
 	const U height = m_r->getHeight() / 2;
 
+	const U mipCount = computeMaxMipmapCount2d(width, height, HIERARCHICAL_Z_MIN_HEIGHT);
+
+	const U lastMipWidth = width >> (mipCount - 1);
+	const U lastMipHeight = height >> (mipCount - 1);
+
+	ANKI_R_LOGI("Initializing HiZ. Mip count %u, last mip size %ux%u", mipCount, lastMipWidth, lastMipHeight);
+
+	m_passes.create(getAllocator(), mipCount);
+
 	// Create RT descrs
 	m_depthRtDescr =
 		m_r->create2DRenderTargetDescription(width, height, GBUFFER_DEPTH_ATTACHMENT_PIXEL_FORMAT, "Half depth");
 	m_depthRtDescr.bake();
 
 	m_hizRtDescr = m_r->create2DRenderTargetDescription(width, height, Format::R32_SFLOAT, "HiZ");
-	m_hizRtDescr.m_mipmapCount = HIERARCHICAL_Z_MIPMAP_COUNT;
+	m_hizRtDescr.m_mipmapCount = mipCount;
 	m_hizRtDescr.bake();
 
 	// Create FB descr
@@ -35,7 +50,7 @@ Error DepthDownscale::initInternal(const ConfigSet&)
 	m_passes[0].m_fbDescr.m_depthStencilAttachment.m_aspect = DepthStencilAspectBit::DEPTH;
 	m_passes[0].m_fbDescr.bake();
 
-	for(U i = 1; i < HIERARCHICAL_Z_MIPMAP_COUNT; ++i)
+	for(U i = 1; i < m_passes.getSize(); ++i)
 	{
 		m_passes[i].m_fbDescr.m_colorAttachments[0].m_loadOperation = AttachmentLoadOperation::DONT_CARE;
 		m_passes[i].m_fbDescr.m_colorAttachments[0].m_surface.m_level = i;
@@ -46,21 +61,44 @@ Error DepthDownscale::initInternal(const ConfigSet&)
 	// Progs
 	ANKI_CHECK(getResourceManager().loadResource("programs/DepthDownscale.ankiprog", m_prog));
 
-	ShaderProgramResourceMutationInitList<2> mutations(m_prog);
-	mutations.add("TYPE", 0).add("SAMPLE_RESOLVE_TYPE", 1);
+	ShaderProgramResourceMutationInitList<3> mutations(m_prog);
+	mutations.add("COPY_TO_CLIENT", 0).add("TYPE", 0).add("SAMPLE_RESOLVE_TYPE", 2);
 
 	const ShaderProgramResourceVariant* variant;
 	m_prog->getOrCreateVariant(mutations.get(), variant);
 	m_passes[0].m_grProg = variant->getProgram();
 
-	for(U i = 1; i < HIERARCHICAL_Z_MIPMAP_COUNT; ++i)
+	for(U i = 1; i < m_passes.getSize(); ++i)
 	{
-		mutations[0].m_value = 1;
+		mutations[1].m_value = 1;
+
+		if(i == m_passes.getSize() - 1)
+		{
+			mutations[0].m_value = 1;
+		}
 
 		m_prog->getOrCreateVariant(mutations.get(), variant);
 		m_passes[i].m_grProg = variant->getProgram();
 	}
 
+	// Copy to buffer
+	{
+		m_copyToBuff.m_lastMipWidth = lastMipWidth;
+		m_copyToBuff.m_lastMipHeight = lastMipHeight;
+
+		// Create buffer
+		BufferInitInfo buffInit("HiZ Client");
+		buffInit.m_access = BufferMapAccessBit::READ;
+		buffInit.m_size = lastMipHeight * lastMipWidth * sizeof(F32);
+		buffInit.m_usage = BufferUsageBit::STORAGE_COMPUTE_WRITE;
+		m_copyToBuff.m_buff = getGrManager().newBuffer(buffInit);
+
+		m_copyToBuff.m_buffAddr = m_copyToBuff.m_buff->map(0, buffInit.m_size, BufferMapAccessBit::READ);
+
+		// Fill the buffer with 1.0f
+		memorySet(static_cast<F32*>(m_copyToBuff.m_buffAddr), 1.0f, lastMipHeight * lastMipWidth);
+	}
+
 	return Error::NONE;
 }
 
@@ -106,7 +144,7 @@ void DepthDownscale::populateRenderGraph(RenderingContext& ctx)
 	}
 
 	// Rest of the passes
-	for(U i = 1; i < HIERARCHICAL_Z_MIPMAP_COUNT; ++i)
+	for(U i = 1; i < m_passes.getSize(); ++i)
 	{
 		GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(passNames[i]);
 
@@ -130,6 +168,7 @@ void DepthDownscale::run(RenderPassWorkContext& rgraphCtx)
 
 	const U passIdx = m_runCtx.m_pass++;
 
+	cmdb->bindShaderProgram(m_passes[passIdx].m_grProg);
 	cmdb->setViewport(0, 0, m_r->getWidth() >> (passIdx + 1), m_r->getHeight() >> (passIdx + 1));
 
 	if(passIdx == 0)
@@ -150,7 +189,13 @@ void DepthDownscale::run(RenderPassWorkContext& rgraphCtx)
 		rgraphCtx.bindTextureAndSampler(0, 0, m_runCtx.m_hizRt, sampleSubresource, m_r->getNearestSampler());
 	}
 
-	cmdb->bindShaderProgram(m_passes[passIdx].m_grProg);
+	if(passIdx == m_passes.getSize() - 1)
+	{
+		UVec2 size(m_copyToBuff.m_lastMipWidth, m_copyToBuff.m_lastMipHeight);
+		cmdb->setPushConstants(&size, sizeof(size));
+
+		cmdb->bindStorageBuffer(0, 0, m_copyToBuff.m_buff, 0, m_copyToBuff.m_buff->getSize());
+	}
 
 	drawQuad(cmdb);
 

+ 22 - 1
src/anki/renderer/DepthDownscale.h

@@ -46,6 +46,19 @@ anki_internal:
 		return m_runCtx.m_hizRt;
 	}
 
+	U32 getMipmapCount() const
+	{
+		return m_passes.getSize();
+	}
+
+	void getClientDepthMapInfo(F32*& depthValues, U32& width, U32& height) const
+	{
+		width = m_copyToBuff.m_lastMipWidth;
+		height = m_copyToBuff.m_lastMipHeight;
+		ANKI_ASSERT(m_copyToBuff.m_buffAddr);
+		depthValues = static_cast<F32*>(m_copyToBuff.m_buffAddr);
+	}
+
 private:
 	RenderTargetDescription m_depthRtDescr;
 	RenderTargetDescription m_hizRtDescr;
@@ -58,7 +71,7 @@ private:
 		ShaderProgramPtr m_grProg;
 	};
 
-	Array<Pass, HIERARCHICAL_Z_MIPMAP_COUNT> m_passes;
+	DynamicArray<Pass> m_passes;
 
 	class
 	{
@@ -68,6 +81,14 @@ private:
 		U m_pass;
 	} m_runCtx; ///< Run context.
 
+	class
+	{
+	public:
+		BufferPtr m_buff;
+		void* m_buffAddr = nullptr;
+		U32 m_lastMipWidth = MAX_U32, m_lastMipHeight = MAX_U32;
+	} m_copyToBuff; ///< Copy to buffer members.
+
 	ANKI_USE_RESULT Error initInternal(const ConfigSet& cfg);
 
 	void run(RenderPassWorkContext& rgraphCtx);

+ 1 - 1
src/anki/renderer/Reflections.cpp

@@ -49,7 +49,7 @@ Error Reflections::initInternal(const ConfigSet& cfg)
 	consts.add("WORKGROUP_SIZE", UVec2(m_workgroupSize[0], m_workgroupSize[1]));
 	consts.add("MAX_STEPS", U32(64));
 	consts.add("LIGHT_BUFFER_MIP_COUNT", U32(m_r->getDownscaleBlur().getMipmapCount()));
-	consts.add("HIZ_MIP_COUNT", U32(HIERARCHICAL_Z_MIPMAP_COUNT));
+	consts.add("HIZ_MIP_COUNT", U32(m_r->getDepthDownscale().getMipmapCount()));
 	consts.add("CLUSTER_COUNT_X", U32(cfg.getNumber("r.clusterSizeX")));
 	consts.add("CLUSTER_COUNT_Y", U32(cfg.getNumber("r.clusterSizeY")));
 	consts.add("CLUSTER_COUNT_Z", U32(cfg.getNumber("r.clusterSizeZ")));

+ 6 - 0
src/anki/renderer/RenderQueue.h

@@ -178,6 +178,9 @@ public:
 
 static_assert(std::is_trivially_destructible<UiQueueElement>::value == true, "Should be trivially destructible");
 
+/// A callback to fill a coverage buffer.
+using FillCoverageBufferCallback = void (*)(void* userData, F32* depthValues, U32 width, U32 height);
+
 /// The render queue. This is what the renderer is fed to render.
 class RenderQueue : public RenderingMatrices
 {
@@ -199,6 +202,9 @@ public:
 
 	F32 m_cameraNear;
 	F32 m_cameraFar;
+
+	FillCoverageBufferCallback m_fillCoverageBufferCallback = nullptr;
+	void* m_fillCoverageBufferCallbackUserData = nullptr;
 };
 
 static_assert(std::is_trivially_destructible<RenderQueue>::value == true, "Should be trivially destructible");

+ 11 - 0
src/anki/renderer/Renderer.cpp

@@ -300,6 +300,17 @@ void Renderer::finalize(const RenderingContext& ctx)
 	++m_frameCount;
 	m_prevViewProjMat = ctx.m_renderQueue->m_viewProjectionMatrix;
 	m_prevCamTransform = ctx.m_renderQueue->m_cameraTransform;
+
+	// Inform about the HiZ map. Do it as late as possible
+	if(ctx.m_renderQueue->m_fillCoverageBufferCallback)
+	{
+		F32* depthValues;
+		U32 width;
+		U32 height;
+		m_depth->getClientDepthMapInfo(depthValues, width, height);
+		ctx.m_renderQueue->m_fillCoverageBufferCallback(
+			ctx.m_renderQueue->m_fillCoverageBufferCallbackUserData, depthValues, width, height);
+	}
 }
 
 Vec3 Renderer::unproject(

+ 1 - 8
src/anki/resource/ShaderProgramResource.cpp

@@ -1134,14 +1134,7 @@ void ShaderProgramResource::initVariant(ConstWeakArray<ShaderProgramResourceMuta
 		progInf.m_shaders[i] = getManager().getGrManager().newShader(inf);
 	}
 
-	if(!m_compute)
-	{
-		variant.m_prog = getManager().getGrManager().newShaderProgram(progInf);
-	}
-	else
-	{
-		variant.m_prog = getManager().getGrManager().newShaderProgram(progInf);
-	}
+	variant.m_prog = getManager().getGrManager().newShaderProgram(progInf);
 }
 
 } // end namespace anki

+ 226 - 12
src/anki/scene/Octree.cpp

@@ -7,10 +7,54 @@
 #include <anki/collision/Tests.h>
 #include <anki/collision/Aabb.h>
 #include <anki/collision/Frustum.h>
+#include <anki/util/ThreadHive.h>
 
 namespace anki
 {
 
+/// Return a heatmap color.
+static Vec3 heatmap(F32 factor)
+{
+	F32 intPart;
+	const F32 fractional = modf(factor * 4.0f, intPart);
+
+	if(intPart < 1.0)
+	{
+		return mix(Vec3(0.0, 0.0, 0.0), Vec3(0.0, 0.0, 1.0), fractional);
+	}
+	else if(intPart < 2.0)
+	{
+		return mix(Vec3(0.0, 0.0, 1.0), Vec3(0.0, 1.0, 0.0), fractional);
+	}
+	else if(intPart < 3.0)
+	{
+		return mix(Vec3(0.0, 1.0, 0.0), Vec3(1.0, 1.0, 0.0), fractional);
+	}
+	else
+	{
+		return mix(Vec3(1.0, 1.0, 0.0), Vec3(1.0, 0.0, 0.0), fractional);
+	}
+}
+
+class Octree::GatherParallelCtx
+{
+public:
+	Octree* m_octree = nullptr;
+	SpinLock m_lock;
+	const Frustum* m_frustum = nullptr;
+	U32 m_testId = MAX_U32;
+	OctreeNodeVisibilityTestCallback m_testCallback = nullptr;
+	void* m_testCallbackUserData = nullptr;
+	DynamicArrayAuto<void*>* m_out = nullptr;
+};
+
+class Octree::GatherParallelTaskCtx
+{
+public:
+	GatherParallelCtx* m_ctx = nullptr;
+	Leaf* m_leaf = nullptr;
+};
+
 Octree::~Octree()
 {
 	ANKI_ASSERT(m_placeableCount == 0);
@@ -31,8 +75,7 @@ void Octree::init(const Vec3& sceneAabbMin, const Vec3& sceneAabbMax, U32 maxDep
 void Octree::place(const Aabb& volume, OctreePlaceable* placeable)
 {
 	ANKI_ASSERT(placeable);
-	ANKI_ASSERT(testCollisionShapes(volume, Aabb(Vec4(Vec3(m_sceneAabbMin), 0.0f), Vec4(Vec3(m_sceneAabbMax), 0.0f)))
-				&& "volume is outside the scene");
+	ANKI_ASSERT(testCollisionShapes(volume, Aabb(m_sceneAabbMin, m_sceneAabbMax)) && "volume is outside the scene");
 
 	LockGuard<Mutex> lock(m_globalMtx);
 
@@ -58,18 +101,36 @@ void Octree::remove(OctreePlaceable& placeable)
 	removeInternal(placeable);
 }
 
+Bool Octree::volumeTotallyInsideLeaf(const Aabb& volume, const Leaf& leaf)
+{
+	const Vec4& amin = volume.getMin();
+	const Vec4& amax = volume.getMax();
+	const Vec3& bmin = leaf.m_aabbMin;
+	const Vec3& bmax = leaf.m_aabbMax;
+
+	Bool superset = true;
+	superset = superset && amin.x() <= bmin.x();
+	superset = superset && amax.x() >= bmax.x();
+	superset = superset && amin.y() <= bmin.y();
+	superset = superset && amax.y() >= bmax.y();
+	superset = superset && amin.z() <= bmin.z();
+	superset = superset && amax.z() >= bmax.z();
+
+	return superset;
+}
+
 void Octree::placeRecursive(const Aabb& volume, OctreePlaceable* placeable, Leaf* parent, U32 depth)
 {
 	ANKI_ASSERT(placeable);
 	ANKI_ASSERT(parent);
-	ANKI_ASSERT(testCollisionShapes(volume, Aabb(Vec4(parent->m_aabbMin, 0.0f), Vec4(parent->m_aabbMax, 0.0f)))
-				&& "Should be inside");
+	ANKI_ASSERT(testCollisionShapes(volume, Aabb(parent->m_aabbMin, parent->m_aabbMax)) && "Should be inside");
 
-	if(depth == m_maxDepth)
+	if(depth == m_maxDepth || volumeTotallyInsideLeaf(volume, *parent))
 	{
 		// Need to stop and bin the placeable to the leaf
 
 		// Checks
+#if ANKI_ASSERTS_ENABLED
 		for(const LeafNode& node : placeable->m_leafs)
 		{
 			ANKI_ASSERT(node.m_leaf != parent && "Already binned. That's wrong");
@@ -79,6 +140,7 @@ void Octree::placeRecursive(const Aabb& volume, OctreePlaceable* placeable, Leaf
 		{
 			ANKI_ASSERT(node.m_placeable != placeable);
 		}
+#endif
 
 		// Connect placeable and leaf
 		placeable->m_leafs.pushBack(newLeafNode(parent));
@@ -235,6 +297,7 @@ void Octree::removeInternal(OctreePlaceable& placeable)
 
 			// Iterate the placeables of the leaf
 			Bool found = false;
+			(void)found;
 			for(PlaceableNode& placeableNode : leafNode.m_leaf->m_placeables)
 			{
 				if(placeableNode.m_placeable == &placeable)
@@ -262,8 +325,12 @@ void Octree::removeInternal(OctreePlaceable& placeable)
 	}
 }
 
-void Octree::gatherVisibleRecursive(
-	const Frustum& frustum, U32 testId, Leaf* leaf, DynamicArrayAuto<OctreePlaceable*>& out)
+void Octree::gatherVisibleRecursive(const Frustum& frustum,
+	U32 testId,
+	OctreeNodeVisibilityTestCallback testCallback,
+	void* testCallbackUserData,
+	Leaf* leaf,
+	DynamicArrayAuto<void*>& out)
 {
 	ANKI_ASSERT(leaf);
 
@@ -272,7 +339,8 @@ void Octree::gatherVisibleRecursive(
 	{
 		if(!placeableNode.m_placeable->alreadyVisited(testId))
 		{
-			out.emplaceBack(placeableNode.m_placeable);
+			ANKI_ASSERT(placeableNode.m_placeable->m_userData);
+			out.emplaceBack(placeableNode.m_placeable->m_userData);
 		}
 	}
 
@@ -282,11 +350,18 @@ void Octree::gatherVisibleRecursive(
 	{
 		if(child)
 		{
-			aabb.setMin(Vec4(child->m_aabbMin, 0.0f));
-			aabb.setMax(Vec4(child->m_aabbMax, 0.0f));
-			if(frustum.insideFrustum(aabb))
+			aabb.setMin(child->m_aabbMin);
+			aabb.setMax(child->m_aabbMax);
+
+			Bool inside = frustum.insideFrustum(aabb);
+			if(inside && testCallback != nullptr)
+			{
+				inside = testCallback(testCallbackUserData, aabb);
+			}
+
+			if(inside)
 			{
-				gatherVisibleRecursive(frustum, testId, child, out);
+				gatherVisibleRecursive(frustum, testId, testCallback, testCallbackUserData, child, out);
 			}
 		}
 	}
@@ -334,4 +409,143 @@ void Octree::cleanupInternal()
 	}
 }
 
+void Octree::debugDrawRecursive(const Leaf& leaf, OctreeDebugDrawer& drawer) const
+{
+	const U32 placeableCount = leaf.m_placeables.getSize();
+	const Vec3 color = (placeableCount > 0) ? heatmap(10.0f / placeableCount) : Vec3(0.25f);
+
+	const Aabb box(leaf.m_aabbMin, leaf.m_aabbMax);
+	drawer.drawCube(box, Vec4(color, 1.0f));
+
+	for(U i = 0; i < 8; ++i)
+	{
+		Leaf* const child = leaf.m_children[i];
+		if(child)
+		{
+			debugDrawRecursive(*child, drawer);
+		}
+	}
+}
+
+void Octree::gatherVisibleParallel(const Frustum* frustum,
+	U32 testId,
+	OctreeNodeVisibilityTestCallback testCallback,
+	void* testCallbackUserData,
+	DynamicArrayAuto<void*>* out,
+	ThreadHive& hive,
+	ThreadHiveSemaphore* waitSemaphore,
+	ThreadHiveSemaphore*& signalSemaphore)
+{
+	ANKI_ASSERT(out && frustum);
+
+	// Create the ctx
+	GatherParallelCtx* ctx = static_cast<GatherParallelCtx*>(
+		hive.allocateScratchMemory(sizeof(GatherParallelCtx), alignof(GatherParallelCtx)));
+	ctx->m_octree = this;
+	ctx->m_frustum = frustum;
+	ctx->m_testId = testId;
+	ctx->m_testCallback = testCallback;
+	ctx->m_testCallbackUserData = testCallbackUserData;
+	ctx->m_out = out;
+
+	// Create the first test ctx
+	GatherParallelTaskCtx* taskCtx = static_cast<GatherParallelTaskCtx*>(
+		hive.allocateScratchMemory(sizeof(GatherParallelTaskCtx), alignof(GatherParallelTaskCtx)));
+	taskCtx->m_ctx = ctx;
+	taskCtx->m_leaf = m_rootLeaf;
+
+	// Create signal semaphore
+	signalSemaphore = hive.newSemaphore(1);
+
+	// Fire the first task
+	ThreadHiveTask task;
+	task.m_callback = gatherVisibleTaskCallback;
+	task.m_argument = taskCtx;
+	task.m_signalSemaphore = signalSemaphore;
+	task.m_waitSemaphore = waitSemaphore;
+
+	hive.submitTasks(&task, 1);
+}
+
+void Octree::gatherVisibleTaskCallback(void* ud, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* sem)
+{
+	ANKI_ASSERT(ud);
+	GatherParallelTaskCtx* taskCtx = static_cast<GatherParallelTaskCtx*>(ud);
+	taskCtx->m_ctx->m_octree->gatherVisibleParallelTask(threadId, hive, sem, *taskCtx);
+}
+
+void Octree::gatherVisibleParallelTask(
+	U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* sem, GatherParallelTaskCtx& taskCtx)
+{
+	ANKI_ASSERT(taskCtx.m_ctx && taskCtx.m_leaf);
+	GatherParallelCtx& ctx = *taskCtx.m_ctx;
+
+	Leaf* const leaf = taskCtx.m_leaf;
+	const Frustum& frustum = *ctx.m_frustum;
+	DynamicArrayAuto<void*>& out = *ctx.m_out;
+	OctreeNodeVisibilityTestCallback testCallback = ctx.m_testCallback;
+	void* testCallbackUserData = ctx.m_testCallbackUserData;
+	const U32 testId = ctx.m_testId;
+
+	// Add the placeables that belong to that leaf
+	if(leaf->m_placeables.getSize() > 0)
+	{
+		LockGuard<SpinLock> lock(taskCtx.m_ctx->m_lock);
+
+		for(PlaceableNode& placeableNode : leaf->m_placeables)
+		{
+			if(!placeableNode.m_placeable->alreadyVisited(testId))
+			{
+				ANKI_ASSERT(placeableNode.m_placeable->m_userData);
+				out.emplaceBack(placeableNode.m_placeable->m_userData);
+			}
+		}
+	}
+
+	// Move to children leafs
+	Array<ThreadHiveTask, 8> tasks;
+	U taskCount = 0;
+	Aabb aabb;
+	for(Leaf* child : leaf->m_children)
+	{
+		if(child)
+		{
+			aabb.setMin(child->m_aabbMin);
+			aabb.setMax(child->m_aabbMax);
+
+			Bool inside = frustum.insideFrustum(aabb);
+			if(inside && testCallback != nullptr)
+			{
+				inside = testCallback(testCallbackUserData, aabb);
+			}
+
+			if(inside)
+			{
+				// New task ctx
+				GatherParallelTaskCtx* newTaskCtx = static_cast<GatherParallelTaskCtx*>(
+					hive.allocateScratchMemory(sizeof(GatherParallelTaskCtx), alignof(GatherParallelTaskCtx)));
+				newTaskCtx->m_ctx = taskCtx.m_ctx;
+				newTaskCtx->m_leaf = child;
+
+				// Populate the task
+				ThreadHiveTask& task = tasks[taskCount++];
+				task.m_callback = gatherVisibleTaskCallback;
+				task.m_argument = newTaskCtx;
+				task.m_signalSemaphore = sem;
+			}
+		}
+	}
+
+	// Submit all tasks at once
+	if(taskCount)
+	{
+		// At this point do a trick. Increase the semaphore value to keep blocking the tasks that depend on the
+		// gather
+		sem->increaseSemaphore(taskCount);
+
+		// Submit
+		hive.submitTasks(&tasks[0], taskCount);
+	}
+}
+
 } // end namespace anki

+ 116 - 5
src/anki/scene/Octree.h

@@ -7,21 +7,34 @@
 
 #include <anki/scene/Common.h>
 #include <anki/Math.h>
-#include <anki/collision/Forward.h>
+#include <anki/collision/Aabb.h>
 #include <anki/util/WeakArray.h>
 #include <anki/util/Enum.h>
 #include <anki/util/ObjectAllocator.h>
 #include <anki/util/List.h>
+#include <anki/core/Trace.h>
 
 namespace anki
 {
 
 // Forward
 class OctreePlaceable;
+class ThreadHive;
+class ThreadHiveSemaphore;
 
 /// @addtogroup scene
 /// @{
 
+/// Callback to determine if an octree node is visible.
+using OctreeNodeVisibilityTestCallback = Bool (*)(void* userData, const Aabb& box);
+
+/// Octree debug drawer.
+class OctreeDebugDrawer
+{
+public:
+	virtual void drawCube(const Aabb& box, const Vec4& color) = 0;
+};
+
 /// Octree for visibility tests.
 class Octree : public NonCopyable
 {
@@ -46,13 +59,57 @@ public:
 	void remove(OctreePlaceable& placeable);
 
 	/// Gather visible placeables.
+	/// @param frustum The frustum to test against.
+	/// @param testId A unique index for this test.
+	/// @param testCallback A ptr to a function that will be used to perform an additional test to the box of the
+	///                     Octree node. Can be nullptr.
+	/// @param testCallbackUserData Parameter to the testCallback. Can be nullptr.
+	/// @param out The output of the tests.
 	/// @note It's thread-safe against other gatherVisible calls.
-	void gatherVisible(const Frustum& frustum, U32 testId, DynamicArrayAuto<OctreePlaceable*>& out)
+	void gatherVisible(const Frustum& frustum,
+		U32 testId,
+		OctreeNodeVisibilityTestCallback testCallback,
+		void* testCallbackUserData,
+		DynamicArrayAuto<void*>& out)
+	{
+		gatherVisibleRecursive(frustum, testId, testCallback, testCallbackUserData, m_rootLeaf, out);
+	}
+
+	/// Similar to gatherVisible but it spawns ThreadHive tasks.
+	void gatherVisibleParallel(const Frustum* frustum,
+		U32 testId,
+		OctreeNodeVisibilityTestCallback testCallback,
+		void* testCallbackUserData,
+		DynamicArrayAuto<void*>* out,
+		ThreadHive& hive,
+		ThreadHiveSemaphore* waitSemaphore,
+		ThreadHiveSemaphore*& signalSemaphore);
+
+	/// Walk the tree.
+	/// @tparam TTestAabbFunc The lambda that will test an Aabb. Signature of lambda: Bool(*)(const Aabb& leafBox)
+	/// @tparam TNewPlaceableFunc The lambda to do something with a visible placeable.
+	///                           Signature: void(*)(void* placeableUserData).
+	/// @param testId The test index.
+	/// @param testFunc See TTestAabbFunc.
+	/// @param newPlaceableFunc See TNewPlaceableFunc.
+	template<typename TTestAabbFunc, typename TNewPlaceableFunc>
+	void walkTree(U32 testId, TTestAabbFunc testFunc, TNewPlaceableFunc newPlaceableFunc)
 	{
-		gatherVisibleRecursive(frustum, testId, m_rootLeaf, out);
+		ANKI_ASSERT(m_rootLeaf);
+		walkTreeInternal(*m_rootLeaf, testId, testFunc, newPlaceableFunc);
+	}
+
+	/// Debug draw.
+	void debugDraw(OctreeDebugDrawer& drawer) const
+	{
+		ANKI_ASSERT(m_rootLeaf);
+		debugDrawRecursive(*m_rootLeaf, drawer);
 	}
 
 private:
+	class GatherParallelCtx;
+	class GatherParallelTaskCtx;
+
 	/// List node.
 	class PlaceableNode : public IntrusiveListEnabled<PlaceableNode>
 	{
@@ -182,6 +239,8 @@ private:
 
 	void placeRecursive(const Aabb& volume, OctreePlaceable* placeable, Leaf* parent, U32 depth);
 
+	static Bool volumeTotallyInsideLeaf(const Aabb& volume, const Leaf& leaf);
+
 	static void computeChildAabb(LeafMask child,
 		const Vec3& parentAabbMin,
 		const Vec3& parentAabbMax,
@@ -192,14 +251,30 @@ private:
 	/// Remove a placeable from the tree.
 	void removeInternal(OctreePlaceable& placeable);
 
-	static void gatherVisibleRecursive(
-		const Frustum& frustum, U32 testId, Leaf* leaf, DynamicArrayAuto<OctreePlaceable*>& out);
+	static void gatherVisibleRecursive(const Frustum& frustum,
+		U32 testId,
+		OctreeNodeVisibilityTestCallback testCallback,
+		void* testCallbackUserData,
+		Leaf* leaf,
+		DynamicArrayAuto<void*>& out);
+
+	/// ThreadHive callback.
+	static void gatherVisibleTaskCallback(void* ud, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* sem);
+
+	void gatherVisibleParallelTask(
+		U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* sem, GatherParallelTaskCtx& taskCtx);
 
 	/// Remove a leaf.
 	void cleanupRecursive(Leaf* leaf, Bool& canDeleteLeafUponReturn);
 
 	/// Cleanup the tree.
 	void cleanupInternal();
+
+	/// Debug draw.
+	void debugDrawRecursive(const Leaf& leaf, OctreeDebugDrawer& drawer) const;
+
+	template<typename TTestAabbFunc, typename TNewPlaceableFunc>
+	void walkTreeInternal(Leaf& leaf, U32 testId, TTestAabbFunc testFunc, TNewPlaceableFunc newPlaceableFunc);
 };
 
 /// An entity that can be placed in octrees.
@@ -208,6 +283,8 @@ class OctreePlaceable : public NonCopyable
 	friend class Octree;
 
 public:
+	void* m_userData = nullptr;
+
 	void reset()
 	{
 		m_visitedMask.set(0);
@@ -221,11 +298,45 @@ private:
 	/// @note It's thread-safe.
 	Bool alreadyVisited(U32 testId)
 	{
+		ANKI_ASSERT(testId < 64);
 		const U64 testMask = U64(1u) << U64(testId);
 		const U64 prev = m_visitedMask.fetchOr(testMask);
 		return !!(testMask & prev);
 	}
 };
+
+template<typename TTestAabbFunc, typename TNewPlaceableFunc>
+inline void Octree::walkTreeInternal(Leaf& leaf, U32 testId, TTestAabbFunc testFunc, TNewPlaceableFunc newPlaceableFunc)
+{
+	// Visit the placeables that belong to that leaf
+	for(PlaceableNode& placeableNode : leaf.m_placeables)
+	{
+		if(!placeableNode.m_placeable->alreadyVisited(testId))
+		{
+			ANKI_ASSERT(placeableNode.m_placeable->m_userData);
+			newPlaceableFunc(placeableNode.m_placeable->m_userData);
+		}
+	}
+
+	Aabb aabb;
+	U visibleLeafs = 0;
+	(void)visibleLeafs;
+	for(Leaf* child : leaf.m_children)
+	{
+		if(child)
+		{
+			aabb.setMin(child->m_aabbMin);
+			aabb.setMax(child->m_aabbMax);
+			if(testFunc(aabb))
+			{
+				++visibleLeafs;
+				walkTreeInternal(*child, testId, testFunc, newPlaceableFunc);
+			}
+		}
+	}
+
+	ANKI_TRACE_INC_COUNTER(OCTREE_VISIBLE_LEAFS, visibleLeafs);
+}
 /// @}
 
 } // end namespace anki

+ 10 - 1
src/anki/scene/SceneGraph.cpp

@@ -7,6 +7,7 @@
 #include <anki/scene/CameraNode.h>
 #include <anki/scene/ModelNode.h>
 #include <anki/scene/SectorNode.h>
+#include <anki/scene/Octree.h>
 #include <anki/core/Trace.h>
 #include <anki/physics/PhysicsWorld.h>
 #include <anki/resource/ResourceManager.h>
@@ -61,6 +62,11 @@ SceneGraph::~SceneGraph()
 		m_alloc.deleteInstance(m_sectors);
 		m_sectors = nullptr;
 	}
+
+	if(m_octree)
+	{
+		m_alloc.deleteInstance(m_octree);
+	}
 }
 
 Error SceneGraph::init(AllocAlignedCallback allocCb,
@@ -94,6 +100,9 @@ Error SceneGraph::init(AllocAlignedCallback allocCb,
 
 	m_maxReflectionProxyDistance = config.getNumber("scene.imageReflectionMaxDistance");
 
+	m_octree = m_alloc.newInstance<Octree>(m_alloc);
+	m_octree->init(Vec3(-1000.0f), Vec3(1000.0f), 5); // TODO
+
 	// Init the default main camera
 	ANKI_CHECK(newSceneNode<PerspectiveCameraNode>("mainCamera", m_defaultMainCam));
 	m_defaultMainCam->setAll(toRad(60.0f), toRad(60.0f), 0.1f, 1000.0f);
@@ -245,7 +254,7 @@ Error SceneGraph::update(Second prevUpdateTime, Second crntTime)
 void SceneGraph::doVisibilityTests(RenderQueue& rqueue)
 {
 	m_stats.m_visibilityTestsTime = HighRezTimer::getCurrentTime();
-	anki::doVisibilityTests(*m_mainCam, *this, rqueue);
+	doVisibilityTests(*m_mainCam, *this, rqueue);
 	m_stats.m_visibilityTestsTime = HighRezTimer::getCurrentTime() - m_stats.m_visibilityTestsTime;
 }
 

+ 12 - 1
src/anki/scene/SceneGraph.h

@@ -7,7 +7,6 @@
 
 #include <anki/scene/Common.h>
 #include <anki/scene/SceneNode.h>
-#include <anki/scene/Visibility.h>
 #include <anki/Math.h>
 #include <anki/util/Singleton.h>
 #include <anki/util/HighRezTimer.h>
@@ -27,6 +26,7 @@ class SectorGroup;
 class ConfigSet;
 class PerspectiveCameraNode;
 class UpdateSceneNodesCtx;
+class Octree;
 
 /// @addtogroup scene
 /// @{
@@ -227,6 +227,12 @@ anki_internal:
 		return m_earlyZDist;
 	}
 
+	Octree& getOctree()
+	{
+		ANKI_ASSERT(m_octree);
+		return *m_octree;
+	}
+
 private:
 	const Timestamp* m_globalTimestamp = nullptr;
 	Timestamp m_timestamp = 0; ///< Cached timestamp
@@ -254,6 +260,8 @@ private:
 	EventManager m_events;
 	SectorGroup* m_sectors;
 
+	Octree* m_octree = nullptr;
+
 	Atomic<U32> m_objectsMarkedForDeletionCount;
 
 	F32 m_maxReflectionProxyDistance = 0.0;
@@ -275,6 +283,9 @@ private:
 
 	ANKI_USE_RESULT Error updateNodes(UpdateSceneNodesCtx& ctx) const;
 	ANKI_USE_RESULT static Error updateNode(Second prevTime, Second crntTime, SceneNode& node);
+
+	/// Do visibility tests.
+	static void doVisibilityTests(SceneNode& frustumable, SceneGraph& scene, RenderQueue& rqueue);
 };
 
 template<typename Node, typename... Args>

+ 59 - 16
src/anki/scene/SoftwareRasterizer.cpp

@@ -287,9 +287,12 @@ void SoftwareRasterizer::rasterizeTriangle(const Vec4* tri)
 				const F32 z1 = ndc[1].z();
 				const F32 z2 = ndc[2].z();
 
-				const F32 depth = z0 * bc[0] + z1 * bc[1] + z2 * bc[2];
+				F32 depth = z0 * bc[0] + z1 * bc[1] + z2 * bc[2];
 				ANKI_ASSERT(depth >= 0.0 && depth <= 1.0);
 
+				// Clamp it to a bit less that 1.0f because 1.0f will produce a 0 depthi
+				depth = min(depth, 1.0f - EPSILON);
+
 				// Store the min of the current value and new one
 				const U32 depthi = depth * MAX_U32;
 				m_zbuffer[U(y) * m_width + U(x)].min(depthi);
@@ -321,36 +324,59 @@ Bool SoftwareRasterizer::visibilityTestInternal(const CollisionShape& cs, const
 	boxPoints[6] = Vec4(maxv.x(), minv.y(), minv.z(), 1.0f);
 	boxPoints[7] = Vec4(maxv.x(), maxv.y(), minv.z(), 1.0f);
 
-	// Compute bounding box
-	const Vec2 windowSize(m_width, m_height);
-
-	Vec2 bboxMin(MAX_F32), bboxMax(MIN_F32);
-	F32 minZ = MAX_F32;
+	// Transform points
 	for(Vec4& p : boxPoints)
 	{
 		p = m_mvp * p;
+	}
+
+	// Check of a point touches the near plane
+	for(const Vec4& p : boxPoints)
+	{
 		if(p.w() <= 0.0f)
 		{
 			// Don't bother clipping. Just mark it as visible.
 			return true;
 		}
+	}
 
-		p = p.perspectiveDivide();
+	// Compute the min and max bounds
+	Vec4 bboxMin(MAX_F32);
+	Vec4 bboxMax(MIN_F32);
+	for(Vec4& p : boxPoints)
+	{
+		// Perspecrive divide
+		p /= p.w();
 
-		for(U i = 0; i < 2; ++i)
-		{
-			F32 a = (p[i] / 2.0f + 0.5f) * windowSize[i];
+		// To [0, 1]
+		p *= Vec4(0.5f, 0.5f, 1.0f, 1.0f);
+		p += Vec4(0.5f, 0.5f, 0.0f, 0.0f);
 
-			bboxMin[i] = min(bboxMin[i], floorf(a));
-			bboxMin[i] = clamp(bboxMin[i], 0.0f, windowSize[i]);
+		// To [0, m_width|m_height]
+		p *= Vec4(m_width, m_height, 1.0f, 1.0f);
 
-			bboxMax[i] = max(bboxMax[i], ceilf(a));
-			bboxMax[i] = clamp(bboxMax[i], 0.0f, windowSize[i]);
-		}
+		// Min
+		bboxMin = bboxMin.min(p);
 
-		minZ = min(minZ, p.z());
+		// Max
+		bboxMax = bboxMax.max(p);
 	}
 
+	// Fix the bounds
+	bboxMin.x() = floorf(bboxMin.x());
+	bboxMin.x() = clamp(bboxMin.x(), 0.0f, F32(m_width));
+
+	bboxMax.x() = ceilf(bboxMax.x());
+	bboxMax.x() = clamp(bboxMax.x(), 0.0f, F32(m_width));
+
+	bboxMin.y() = floorf(bboxMin.y());
+	bboxMin.y() = clamp(bboxMin.y(), 0.0f, F32(m_height));
+
+	bboxMax.y() = ceilf(bboxMax.y());
+	bboxMax.y() = clamp(bboxMax.y(), 0.0f, F32(m_height));
+
+	// Loop the tiles
+	F32 minZ = bboxMin.z();
 	for(U y = bboxMin.y(); y < bboxMax.y(); y += 1.0f)
 	{
 		for(U x = bboxMin.x(); x < bboxMax.x(); x += 1.0f)
@@ -368,4 +394,21 @@ Bool SoftwareRasterizer::visibilityTestInternal(const CollisionShape& cs, const
 	return false;
 }
 
+void SoftwareRasterizer::fillDepthBuffer(ConstWeakArray<F32> depthValues)
+{
+	ANKI_ASSERT(m_zbuffer.getSize() == depthValues.getSize());
+
+	U count = depthValues.getSize();
+	while(count--)
+	{
+		F32 depth = depthValues[count];
+		ANKI_ASSERT(depth >= 0.0f && depth <= 1.0f);
+
+		depth = min(depth, 1.0f - EPSILON); // See a few lines above why is that
+
+		const U32 depthi = depth * MAX_U32;
+		m_zbuffer[count].set(depthi);
+	}
+}
+
 } // end namespace anki

+ 5 - 0
src/anki/scene/SoftwareRasterizer.h

@@ -8,6 +8,7 @@
 #include <anki/scene/Common.h>
 #include <anki/Math.h>
 #include <anki/collision/Plane.h>
+#include <anki/util/WeakArray.h>
 
 namespace anki
 {
@@ -42,8 +43,12 @@ public:
 	/// @param vertCount The number of verts to draw.
 	/// @param stride The stride (in bytes) of the next vertex.
 	/// @param backfaceCulling If true it will do backface culling.
+	/// @note It's thread-safe against other draw() invocations only.
 	void draw(const F32* verts, U vertCount, U stride, Bool backfaceCulling);
 
+	/// Fill the depth buffer with some values.
+	void fillDepthBuffer(ConstWeakArray<F32> depthValues);
+
 	/// Perform visibility tests.
 	/// @param cs The collision shape in world space.
 	/// @param aabb The Aabb in of the cs in world space.

+ 180 - 188
src/anki/scene/Visibility.cpp

@@ -3,7 +3,6 @@
 // Code licensed under the BSD License.
 // http://www.anki3d.org/LICENSE
 
-#include <anki/scene/Visibility.h>
 #include <anki/scene/VisibilityInternal.h>
 #include <anki/scene/SceneGraph.h>
 #include <anki/scene/SectorNode.h>
@@ -24,8 +23,10 @@
 namespace anki
 {
 
-void VisibilityContext::submitNewWork(FrustumComponent& frc, RenderQueue& rqueue, ThreadHive& hive)
+void VisibilityContext::submitNewWork(const FrustumComponent& frc, RenderQueue& rqueue, ThreadHive& hive)
 {
+	ANKI_TRACE_SCOPED_EVENT(SCENE_VIS_SUBMIT_WORK);
+
 	// Check enabled and make sure that the results are null (this can happen on multiple on circular viewing)
 	if(ANKI_UNLIKELY(!frc.anyVisibilityTestEnabled()))
 	{
@@ -58,171 +59,158 @@ void VisibilityContext::submitNewWork(FrustumComponent& frc, RenderQueue& rqueue
 		m_testedFrcs.pushBack(alloc, &frc);
 	}
 
+	// Prepare the ctx
+	FrustumVisibilityContext* frcCtx = alloc.newInstance<FrustumVisibilityContext>();
+	frcCtx->m_visCtx = this;
+	frcCtx->m_frc = &frc;
+	frcCtx->m_queueViews.create(alloc, hive.getThreadCount());
+	frcCtx->m_visTestsSignalSem = hive.newSemaphore(1);
+	frcCtx->m_renderQueue = &rqueue;
+
 	// Submit new work
 	//
 
-	// Software rasterizer tasks
-	SoftwareRasterizer* r = nullptr;
-	Array<ThreadHiveDependencyHandle, ThreadHive::MAX_THREADS> rasterizeDeps;
-	if(frc.visibilityTestsEnabled(FrustumComponentVisibilityTestFlag::OCCLUDERS))
+	// Software rasterizer task
+	ThreadHiveSemaphore* prepareRasterizerSem = nullptr;
+	if(frc.visibilityTestsEnabled(FrustumComponentVisibilityTestFlag::OCCLUDERS) && frc.hasCoverageBuffer())
 	{
 		// Gather triangles task
-		GatherVisibleTrianglesTask* gather = alloc.newInstance<GatherVisibleTrianglesTask>();
-		gather->m_visCtx = this;
-		gather->m_frc = &frc;
-		gather->m_vertCount = 0;
-
-		r = &gather->m_r;
+		ThreadHiveTask fillDepthTask;
+		fillDepthTask.m_callback = FillRasterizerWithCoverageTask::callback;
+		fillDepthTask.m_argument = alloc.newInstance<FillRasterizerWithCoverageTask>(frcCtx);
+		fillDepthTask.m_signalSemaphore = hive.newSemaphore(1);
 
-		ThreadHiveTask gatherTask;
-		gatherTask.m_callback = GatherVisibleTrianglesTask::callback;
-		gatherTask.m_argument = gather;
+		hive.submitTasks(&fillDepthTask, 1);
 
-		hive.submitTasks(&gatherTask, 1);
-
-		// Rasterize triangles task
-		U count = hive.getThreadCount();
-		RasterizeTrianglesTask* rasterize = alloc.newArray<RasterizeTrianglesTask>(count);
-
-		Array<ThreadHiveTask, ThreadHive::MAX_THREADS> rastTasks;
-		while(count--)
-		{
-			RasterizeTrianglesTask& rast = rasterize[count];
-			rast.m_gatherTask = gather;
-			rast.m_taskIdx = count;
-			rast.m_taskCount = hive.getThreadCount();
-
-			rastTasks[count].m_callback = RasterizeTrianglesTask::callback;
-			rastTasks[count].m_argument = &rast;
-			rastTasks[count].m_inDependencies = WeakArray<ThreadHiveDependencyHandle>(&gatherTask.m_outDependency, 1);
-		}
-
-		count = hive.getThreadCount();
-		hive.submitTasks(&rastTasks[0], count);
-		while(count--)
-		{
-			rasterizeDeps[count] = rastTasks[count].m_outDependency;
-		}
+		prepareRasterizerSem = fillDepthTask.m_signalSemaphore;
 	}
 
-	// Gather visibles from sector
-	GatherVisiblesFromSectorsTask* gather = alloc.newInstance<GatherVisiblesFromSectorsTask>();
-	gather->m_visCtx = this;
-	gather->m_frc = &frc;
-	gather->m_r = r;
-
-	ThreadHiveTask gatherTask;
-	gatherTask.m_callback = GatherVisiblesFromSectorsTask::callback;
-	gatherTask.m_argument = gather;
-	if(r)
+	if(frc.visibilityTestsEnabled(FrustumComponentVisibilityTestFlag::OCCLUDERS))
 	{
-		gatherTask.m_inDependencies = WeakArray<ThreadHiveDependencyHandle>(&rasterizeDeps[0], hive.getThreadCount());
+		rqueue.m_fillCoverageBufferCallback = FrustumComponent::fillCoverageBufferCallback;
+		rqueue.m_fillCoverageBufferCallbackUserData = static_cast<void*>(const_cast<FrustumComponent*>(&frc));
 	}
 
-	hive.submitTasks(&gatherTask, 1);
-
-	// Test tasks
-	U testCount = hive.getThreadCount();
-	WeakArray<VisibilityTestTask> tests(alloc.newArray<VisibilityTestTask>(testCount), testCount);
-	WeakArray<ThreadHiveTask> testTasks(alloc.newArray<ThreadHiveTask>(testCount), testCount);
-
-	for(U i = 0; i < testCount; ++i)
-	{
-		auto& test = tests[i];
-		test.m_visCtx = this;
-		test.m_frc = &frc;
-		test.m_sectorsCtx = &gather->m_sectorsCtx;
-		test.m_taskIdx = i;
-		test.m_taskCount = testCount;
-		test.m_r = r;
-
-		auto& task = testTasks[i];
-		task.m_callback = VisibilityTestTask::callback;
-		task.m_argument = &test;
-		task.m_inDependencies = WeakArray<ThreadHiveDependencyHandle>(&gatherTask.m_outDependency, 1);
-	}
+	// Gather visibles from the octree
+	ThreadHiveTask gatherTask;
+	gatherTask.m_callback = GatherVisiblesFromOctreeTask::callback;
+	gatherTask.m_argument = alloc.newInstance<GatherVisiblesFromOctreeTask>(frcCtx);
+	gatherTask.m_signalSemaphore = nullptr; // No need to signal anything because it will spawn new tasks
+	gatherTask.m_waitSemaphore = prepareRasterizerSem;
 
-	hive.submitTasks(&testTasks[0], testCount);
+	hive.submitTasks(&gatherTask, 1);
 
 	// Combind results task
-	CombineResultsTask* combine = alloc.newInstance<CombineResultsTask>();
-	combine->m_visCtx = this;
-	combine->m_frc = &frc;
-	combine->m_results = &rqueue;
-	combine->m_tests = tests;
-	combine->m_swRast = r;
-
 	ThreadHiveTask combineTask;
 	combineTask.m_callback = CombineResultsTask::callback;
-	combineTask.m_argument = combine;
-	combineTask.m_inDependencies =
-		WeakArray<ThreadHiveDependencyHandle>(alloc.newArray<ThreadHiveDependencyHandle>(testCount), testCount);
-	for(U i = 0; i < testCount; ++i)
-	{
-		combineTask.m_inDependencies[i] = testTasks[i].m_outDependency;
-	}
+	combineTask.m_argument = alloc.newInstance<CombineResultsTask>(frcCtx);
+	ANKI_ASSERT(frcCtx->m_visTestsSignalSem);
+	combineTask.m_waitSemaphore = frcCtx->m_visTestsSignalSem;
 
 	hive.submitTasks(&combineTask, 1);
 }
 
-void GatherVisibleTrianglesTask::gather()
+void FillRasterizerWithCoverageTask::fill()
 {
-	ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_GATHER_TRIANGLES);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_VIS_FILL_DEPTH);
 
-	auto alloc = m_visCtx->m_scene->getFrameAllocator();
-	m_verts.create(alloc, TRIANGLES_INITIAL_SIZE);
-	SceneComponentLists& lists = m_visCtx->m_scene->getSceneComponentLists();
+	auto alloc = m_frcCtx->m_visCtx->m_scene->getFrameAllocator();
 
-	ANKI_ASSERT(m_vertCount == 0);
-	lists.iterateComponents<OccluderComponent>([&](OccluderComponent& comp) {
-		if(m_frc->insideFrustum(comp.getBoundingVolume()))
-		{
-			U32 count, stride;
-			const Vec3* it;
-			comp.getVertices(it, count, stride);
-			while(count--)
+	// Get the C-Buffer
+	ConstWeakArray<F32> depthBuff;
+	U32 width;
+	U32 height;
+	m_frcCtx->m_frc->getCoverageBufferInfo(depthBuff, width, height);
+	ANKI_ASSERT(width > 0 && height > 0 && depthBuff.getSize() > 0);
+
+	// Init the rasterizer
+	m_frcCtx->m_r = alloc.newInstance<SoftwareRasterizer>();
+	m_frcCtx->m_r->init(alloc);
+	m_frcCtx->m_r->prepare(m_frcCtx->m_frc->getViewMatrix(), m_frcCtx->m_frc->getProjectionMatrix(), width, height);
+
+	// Do the work
+	m_frcCtx->m_r->fillDepthBuffer(depthBuff);
+}
+
+void GatherVisiblesFromOctreeTask::gather(ThreadHive& hive, ThreadHiveSemaphore& sem)
+{
+	ANKI_TRACE_SCOPED_EVENT(SCENE_VIS_OCTREE);
+
+	U testIdx = m_frcCtx->m_visCtx->m_testsCount.fetchAdd(1);
+
+	// Walk the tree
+	m_frcCtx->m_visCtx->m_scene->getOctree().walkTree(testIdx,
+		[&](const Aabb& box) {
+			Bool visible = m_frcCtx->m_frc->insideFrustum(box);
+			if(visible && m_frcCtx->m_r)
 			{
-				// Grow the array
-				if(m_vertCount + 1 > m_verts.getSize())
-				{
-					m_verts.resize(alloc, m_verts.getSize() * 2);
-				}
+				visible = m_frcCtx->m_r->visibilityTest(box, box);
+			}
 
-				m_verts[m_vertCount++] = *it;
+			return visible;
+		},
+		[&](void* placeableUserData) {
+			ANKI_ASSERT(placeableUserData);
+			SpatialComponent* scomp = static_cast<SpatialComponent*>(placeableUserData);
 
-				it = reinterpret_cast<const Vec3*>(reinterpret_cast<const U8*>(it) + stride);
+			ANKI_ASSERT(m_spatialCount < m_spatials.getSize());
+
+			m_spatials[m_spatialCount++] = scomp;
+
+			if(m_spatialCount == m_spatials.getSize())
+			{
+				flush(hive, sem);
 			}
-		}
-	});
+		});
+
+	// Flush the remaining
+	flush(hive, sem);
 
-	m_r.init(alloc);
-	m_r.prepare(m_frc->getViewMatrix(), m_frc->getProjectionMatrix(), 80, 50);
+	// Fire an additional dummy task to decrease the semaphore to zero
+	ThreadHiveTask task;
+	task.m_callback = dummyCallback;
+	task.m_argument = nullptr;
+	task.m_signalSemaphore = m_frcCtx->m_visTestsSignalSem;
+	hive.submitTasks(&task, 1);
 }
 
-void RasterizeTrianglesTask::rasterize()
+void GatherVisiblesFromOctreeTask::flush(ThreadHive& hive, ThreadHiveSemaphore& sem)
 {
-	ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_RASTERIZE);
+	if(m_spatialCount)
+	{
+		// Create the task
+		VisibilityTestTask* vis =
+			m_frcCtx->m_visCtx->m_scene->getFrameAllocator().newInstance<VisibilityTestTask>(m_frcCtx);
+		memcpy(&vis->m_spatialsToTest[0], &m_spatials[0], sizeof(m_spatials[0]) * m_spatialCount);
+		vis->m_spatialToTestCount = m_spatialCount;
 
-	const U totalVertCount = m_gatherTask->m_vertCount;
+		// Increase the semaphore to block the CombineResultsTask
+		m_frcCtx->m_visTestsSignalSem->increaseSemaphore(1);
 
-	U32 idx;
-	while((idx = m_gatherTask->m_rasterizedVertCount.fetchAdd(3)) < totalVertCount)
-	{
-		m_gatherTask->m_r.draw(&m_gatherTask->m_verts[idx][0], 3, sizeof(Vec3), false);
+		// Submit task
+		ThreadHiveTask task;
+		task.m_callback = VisibilityTestTask::callback;
+		task.m_argument = vis;
+		task.m_signalSemaphore = m_frcCtx->m_visTestsSignalSem;
+		hive.submitTasks(&task, 1);
+
+		// Clear count
+		m_spatialCount = 0;
 	}
 }
 
-void VisibilityTestTask::test(ThreadHive& hive)
+void VisibilityTestTask::test(ThreadHive& hive, U32 taskId)
 {
-	ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_TEST);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_VIS_TEST);
 
-	FrustumComponent& testedFrc = *m_frc;
+	const FrustumComponent& testedFrc = *m_frcCtx->m_frc;
 	ANKI_ASSERT(testedFrc.anyVisibilityTestEnabled());
 
-	SceneNode& testedNode = testedFrc.getSceneNode();
-	auto alloc = m_visCtx->m_scene->getFrameAllocator();
+	const SceneNode& testedNode = testedFrc.getSceneNode();
+	auto alloc = m_frcCtx->m_visCtx->m_scene->getFrameAllocator();
 
-	m_timestamp = testedNode.getComponentMaxTimestamp();
+	Timestamp& timestamp = m_frcCtx->m_queueViews[taskId].m_timestamp;
+	timestamp = testedNode.getComponentMaxTimestamp();
 
 	const Bool wantsRenderComponents =
 		testedFrc.visibilityTestsEnabled(FrustumComponentVisibilityTestFlag::RENDER_COMPONENTS);
@@ -244,24 +232,27 @@ void VisibilityTestTask::test(ThreadHive& hive)
 
 	const Bool wantsDecals = testedFrc.visibilityTestsEnabled(FrustumComponentVisibilityTestFlag::DECALS);
 
-	const Bool wantsEarlyZ =
-		testedFrc.visibilityTestsEnabled(FrustumComponentVisibilityTestFlag::EARLY_Z) && m_visCtx->m_earlyZDist > 0.0f;
+	const Bool wantsEarlyZ = testedFrc.visibilityTestsEnabled(FrustumComponentVisibilityTestFlag::EARLY_Z)
+							 && m_frcCtx->m_visCtx->m_earlyZDist > 0.0f;
 
-	// Chose the test range and a few other things
-	PtrSize start, end;
-	ThreadPoolTask::choseStartEnd(m_taskIdx, m_taskCount, m_sectorsCtx->getVisibleSceneNodeCount(), start, end);
+	// Iterate
+	RenderQueueView& result = m_frcCtx->m_queueViews[taskId];
+	for(U i = 0; i < m_spatialToTestCount; ++i)
+	{
+		const SpatialComponent* spatialC = m_spatialsToTest[i];
+		ANKI_ASSERT(spatialC);
+		const SceneNode& node = spatialC->getSceneNode();
 
-	m_sectorsCtx->iterateVisibleSceneNodes(start, end, [&](SceneNode& node) {
 		// Skip if it is the same
 		if(ANKI_UNLIKELY(&testedNode == &node))
 		{
-			return;
+			continue;
 		}
 
 		// Check what components the frustum needs
 		Bool wantNode = false;
 
-		RenderComponent* rc = node.tryGetComponent<RenderComponent>();
+		const RenderComponent* rc = node.tryGetComponent<RenderComponent>();
 		if(rc && wantsRenderComponents)
 		{
 			wantNode = true;
@@ -272,31 +263,31 @@ void VisibilityTestTask::test(ThreadHive& hive)
 			wantNode = true;
 		}
 
-		LightComponent* lc = node.tryGetComponent<LightComponent>();
+		const LightComponent* lc = node.tryGetComponent<LightComponent>();
 		if(lc && wantsLightComponents)
 		{
 			wantNode = true;
 		}
 
-		LensFlareComponent* lfc = node.tryGetComponent<LensFlareComponent>();
+		const LensFlareComponent* lfc = node.tryGetComponent<LensFlareComponent>();
 		if(lfc && wantsFlareComponents)
 		{
 			wantNode = true;
 		}
 
-		ReflectionProbeComponent* reflc = node.tryGetComponent<ReflectionProbeComponent>();
+		const ReflectionProbeComponent* reflc = node.tryGetComponent<ReflectionProbeComponent>();
 		if(reflc && wantsReflectionProbes)
 		{
 			wantNode = true;
 		}
 
-		ReflectionProxyComponent* proxyc = node.tryGetComponent<ReflectionProxyComponent>();
+		const ReflectionProxyComponent* proxyc = node.tryGetComponent<ReflectionProxyComponent>();
 		if(proxyc && wantsReflectionProxies)
 		{
 			wantNode = true;
 		}
 
-		DecalComponent* decalc = node.tryGetComponent<DecalComponent>();
+		const DecalComponent* decalc = node.tryGetComponent<DecalComponent>();
 		if(decalc && wantsDecals)
 		{
 			wantNode = true;
@@ -305,7 +296,7 @@ void VisibilityTestTask::test(ThreadHive& hive)
 		if(ANKI_UNLIKELY(!wantNode))
 		{
 			// Skip node
-			return;
+			continue;
 		}
 
 		// Test all spatial components of that node
@@ -325,8 +316,6 @@ void VisibilityTestTask::test(ThreadHive& hive)
 				// Inside
 				ANKI_ASSERT(spIdx < MAX_U8);
 				sps[count++] = SpatialTemp{&sp, static_cast<U8>(spIdx), sp.getSpatialOrigin()};
-
-				sp.setVisibleByCamera(true);
 			}
 
 			++spIdx;
@@ -337,7 +326,7 @@ void VisibilityTestTask::test(ThreadHive& hive)
 
 		if(ANKI_UNLIKELY(count == 0))
 		{
-			return;
+			continue;
 		}
 
 		ANKI_ASSERT(count == 1 && "TODO: Support sub-spatials");
@@ -363,11 +352,11 @@ void VisibilityTestTask::test(ThreadHive& hive)
 				RenderableQueueElement* el;
 				if(rc->getMaterial().isForwardShading())
 				{
-					el = m_result.m_forwardShadingRenderables.newElement(alloc);
+					el = result.m_forwardShadingRenderables.newElement(alloc);
 				}
 				else
 				{
-					el = m_result.m_renderables.newElement(alloc);
+					el = result.m_renderables.newElement(alloc);
 				}
 
 				rc->setupRenderableQueueElement(*el);
@@ -376,10 +365,10 @@ void VisibilityTestTask::test(ThreadHive& hive)
 				const Plane& nearPlane = testedFrc.getFrustum().getPlanesWorldSpace()[FrustumPlaneType::NEAR];
 				el->m_distanceFromCamera = max(0.0f, sps[0].m_sp->getAabb().testPlane(nearPlane));
 
-				if(wantsEarlyZ && el->m_distanceFromCamera < m_visCtx->m_earlyZDist
+				if(wantsEarlyZ && el->m_distanceFromCamera < m_frcCtx->m_visCtx->m_earlyZDist
 					&& !rc->getMaterial().isForwardShading())
 				{
-					RenderableQueueElement* el2 = m_result.m_earlyZRenderables.newElement(alloc);
+					RenderableQueueElement* el2 = result.m_earlyZRenderables.newElement(alloc);
 					*el2 = *el;
 				}
 			}
@@ -391,7 +380,7 @@ void VisibilityTestTask::test(ThreadHive& hive)
 			{
 			case LightComponentType::POINT:
 			{
-				PointLightQueueElement* el = m_result.m_pointLights.newElement(alloc);
+				PointLightQueueElement* el = result.m_pointLights.newElement(alloc);
 				lc->setupPointLightQueueElement(*el);
 
 				if(lc->getShadowEnabled())
@@ -406,8 +395,8 @@ void VisibilityTestTask::test(ThreadHive& hive)
 					el->m_shadowRenderQueues[4] = &nextQueues[4];
 					el->m_shadowRenderQueues[5] = &nextQueues[5];
 
-					U32* p = m_result.m_shadowPointLights.newElement(alloc);
-					*p = m_result.m_pointLights.m_elementCount - 1;
+					U32* p = result.m_shadowPointLights.newElement(alloc);
+					*p = result.m_pointLights.m_elementCount - 1;
 				}
 				else
 				{
@@ -418,7 +407,7 @@ void VisibilityTestTask::test(ThreadHive& hive)
 			}
 			case LightComponentType::SPOT:
 			{
-				SpotLightQueueElement* el = m_result.m_spotLights.newElement(alloc);
+				SpotLightQueueElement* el = result.m_spotLights.newElement(alloc);
 				lc->setupSpotLightQueueElement(*el);
 
 				if(lc->getShadowEnabled())
@@ -427,8 +416,8 @@ void VisibilityTestTask::test(ThreadHive& hive)
 					nextQueues = WeakArray<RenderQueue>(a, 1);
 					el->m_shadowRenderQueue = a;
 
-					U32* p = m_result.m_shadowSpotLights.newElement(alloc);
-					*p = m_result.m_spotLights.m_elementCount - 1;
+					U32* p = result.m_shadowSpotLights.newElement(alloc);
+					*p = result.m_spotLights.m_elementCount - 1;
 				}
 				else
 				{
@@ -444,13 +433,13 @@ void VisibilityTestTask::test(ThreadHive& hive)
 
 		if(lfc && wantsFlareComponents)
 		{
-			LensFlareQueueElement* el = m_result.m_lensFlares.newElement(alloc);
+			LensFlareQueueElement* el = result.m_lensFlares.newElement(alloc);
 			lfc->setupLensFlareQueueElement(*el);
 		}
 
 		if(reflc && wantsReflectionProbes)
 		{
-			ReflectionProbeQueueElement* el = m_result.m_reflectionProbes.newElement(alloc);
+			ReflectionProbeQueueElement* el = result.m_reflectionProbes.newElement(alloc);
 			reflc->setupReflectionProbeQueueElement(*el);
 
 			if(reflc->getMarkedForRendering())
@@ -478,7 +467,7 @@ void VisibilityTestTask::test(ThreadHive& hive)
 
 		if(decalc && wantsDecals)
 		{
-			DecalQueueElement* el = m_result.m_decals.newElement(alloc);
+			DecalQueueElement* el = result.m_decals.newElement(alloc);
 			decalc->setupDecalQueueElement(*el);
 		}
 
@@ -487,42 +476,45 @@ void VisibilityTestTask::test(ThreadHive& hive)
 		{
 			count = 0;
 			err = node.iterateComponentsOfType<FrustumComponent>([&](FrustumComponent& frc) {
-				m_visCtx->submitNewWork(frc, nextQueues[count++], hive);
+				m_frcCtx->m_visCtx->submitNewWork(frc, nextQueues[count++], hive);
 				return Error::NONE;
 			});
 			(void)err;
 		}
 
 		// Update timestamp
-		m_timestamp = max(m_timestamp, node.getComponentMaxTimestamp());
-	}); // end for
+		timestamp = max(timestamp, node.getComponentMaxTimestamp());
+	} // end for
 }
 
 void CombineResultsTask::combine()
 {
-	ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_COMBINE_RESULTS);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_VIS_COMBINE_RESULTS);
 
-	auto alloc = m_visCtx->m_scene->getFrameAllocator();
+	auto alloc = m_frcCtx->m_visCtx->m_scene->getFrameAllocator();
+	RenderQueue& results = *m_frcCtx->m_renderQueue;
 
-	m_results->m_shadowRenderablesLastUpdateTimestamp = 0;
-	for(U i = 0; i < m_tests.getSize(); ++i)
+	// Compute the timestamp
+	const U threadCount = m_frcCtx->m_queueViews.getSize();
+	results.m_shadowRenderablesLastUpdateTimestamp = 0;
+	for(U i = 0; i < threadCount; ++i)
 	{
-		m_results->m_shadowRenderablesLastUpdateTimestamp =
-			max(m_results->m_shadowRenderablesLastUpdateTimestamp, m_tests[i].m_timestamp);
+		results.m_shadowRenderablesLastUpdateTimestamp =
+			max(results.m_shadowRenderablesLastUpdateTimestamp, m_frcCtx->m_queueViews[i].m_timestamp);
 	}
-	ANKI_ASSERT(m_results->m_shadowRenderablesLastUpdateTimestamp);
+	ANKI_ASSERT(results.m_shadowRenderablesLastUpdateTimestamp);
 
 #define ANKI_VIS_COMBINE(t_, member_) \
 	{ \
 		Array<TRenderQueueElementStorage<t_>, 64> subStorages; \
-		for(U i = 0; i < m_tests.getSize(); ++i) \
+		for(U i = 0; i < threadCount; ++i) \
 		{ \
-			subStorages[i] = m_tests[i].m_result.member_; \
+			subStorages[i] = m_frcCtx->m_queueViews[i].member_; \
 		} \
 		combineQueueElements<t_>(alloc, \
-			WeakArray<TRenderQueueElementStorage<t_>>(&subStorages[0], m_tests.getSize()), \
+			WeakArray<TRenderQueueElementStorage<t_>>(&subStorages[0], threadCount), \
 			nullptr, \
-			m_results->member_, \
+			results.member_, \
 			nullptr); \
 	}
 
@@ -530,17 +522,17 @@ void CombineResultsTask::combine()
 	{ \
 		Array<TRenderQueueElementStorage<t_>, 64> subStorages; \
 		Array<TRenderQueueElementStorage<U32>, 64> ptrSubStorages; \
-		for(U i = 0; i < m_tests.getSize(); ++i) \
+		for(U i = 0; i < threadCount; ++i) \
 		{ \
-			subStorages[i] = m_tests[i].m_result.member_; \
-			ptrSubStorages[i] = m_tests[i].m_result.ptrMember_; \
+			subStorages[i] = m_frcCtx->m_queueViews[i].member_; \
+			ptrSubStorages[i] = m_frcCtx->m_queueViews[i].ptrMember_; \
 		} \
-		WeakArray<TRenderQueueElementStorage<U32>> arr(&ptrSubStorages[0], m_tests.getSize()); \
+		WeakArray<TRenderQueueElementStorage<U32>> arr(&ptrSubStorages[0], threadCount); \
 		combineQueueElements<t_>(alloc, \
-			WeakArray<TRenderQueueElementStorage<t_>>(&subStorages[0], m_tests.getSize()), \
+			WeakArray<TRenderQueueElementStorage<t_>>(&subStorages[0], threadCount), \
 			&arr, \
-			m_results->member_, \
-			&m_results->ptrMember_); \
+			results.member_, \
+			&results.ptrMember_); \
 	}
 
 	ANKI_VIS_COMBINE(RenderableQueueElement, m_renderables);
@@ -556,32 +548,32 @@ void CombineResultsTask::combine()
 #undef ANKI_VIS_COMBINE_AND_PTR
 
 #if ANKI_EXTRA_CHECKS
-	for(PointLightQueueElement* light : m_results->m_shadowPointLights)
+	for(PointLightQueueElement* light : results.m_shadowPointLights)
 	{
 		ANKI_ASSERT(light->hasShadow());
 	}
 
-	for(SpotLightQueueElement* light : m_results->m_shadowSpotLights)
+	for(SpotLightQueueElement* light : results.m_shadowSpotLights)
 	{
 		ANKI_ASSERT(light->hasShadow());
 	}
 #endif
 
 	// Sort some of the arrays
-	std::sort(
-		m_results->m_renderables.getBegin(), m_results->m_renderables.getEnd(), MaterialDistanceSortFunctor(20.0f));
+	std::sort(results.m_renderables.getBegin(), results.m_renderables.getEnd(), MaterialDistanceSortFunctor(20.0f));
 
-	std::sort(m_results->m_earlyZRenderables.getBegin(),
-		m_results->m_earlyZRenderables.getEnd(),
+	std::sort(results.m_earlyZRenderables.getBegin(),
+		results.m_earlyZRenderables.getEnd(),
 		DistanceSortFunctor<RenderableQueueElement>());
 
-	std::sort(m_results->m_forwardShadingRenderables.getBegin(),
-		m_results->m_forwardShadingRenderables.getEnd(),
+	std::sort(results.m_forwardShadingRenderables.getBegin(),
+		results.m_forwardShadingRenderables.getEnd(),
 		RevDistanceSortFunctor<RenderableQueueElement>());
 
-	if(m_swRast)
+	// Cleanup
+	if(m_frcCtx->m_r)
 	{
-		m_swRast->~SoftwareRasterizer();
+		m_frcCtx->m_r->~SoftwareRasterizer();
 	}
 }
 
@@ -681,9 +673,9 @@ void CombineResultsTask::combineQueueElements(SceneFrameAllocator<U8>& alloc,
 	}
 }
 
-void doVisibilityTests(SceneNode& fsn, SceneGraph& scene, RenderQueue& rqueue)
+void SceneGraph::doVisibilityTests(SceneNode& fsn, SceneGraph& scene, RenderQueue& rqueue)
 {
-	ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_TESTS);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_VIS_TESTS);
 
 	ThreadHive& hive = scene.getThreadHive();
 	scene.getSectorGroup().prepareForVisibilityTests();

+ 0 - 21
src/anki/scene/Visibility.h

@@ -1,21 +0,0 @@
-// Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
-// All rights reserved.
-// Code licensed under the BSD License.
-// http://www.anki3d.org/LICENSE
-
-#pragma once
-
-#include <anki/scene/Common.h>
-#include <anki/renderer/RenderQueue.h>
-
-namespace anki
-{
-
-/// @addtogroup scene
-/// @{
-
-/// Do visibility tests.
-void doVisibilityTests(SceneNode& frustumable, SceneGraph& scene, RenderQueue& rqueue);
-/// @}
-
-} // end namespace anki

+ 84 - 67
src/anki/scene/VisibilityInternal.h

@@ -5,23 +5,25 @@
 
 #pragma once
 
-#include <anki/scene/Visibility.h>
 #include <anki/scene/SectorNode.h>
 #include <anki/scene/SceneGraph.h>
 #include <anki/scene/SoftwareRasterizer.h>
+#include <anki/scene/components/FrustumComponent.h>
+#include <anki/scene/Octree.h>
 #include <anki/util/Thread.h>
 #include <anki/core/Trace.h>
+#include <anki/renderer/RenderQueue.h>
 
 namespace anki
 {
 
-// Forward
-class FrustumComponent;
-class ThreadHive;
-
 /// @addtogroup scene
 /// @{
 
+static const U32 MAX_SPATIALS_PER_VIS_TEST = 48; ///< Num of spatials to test in a single ThreadHive task.
+static const U32 SW_RASTERIZER_WIDTH = 80;
+static const U32 SW_RASTERIZER_HEIGHT = 50;
+
 /// Sort objects on distance
 template<typename T>
 class DistanceSortFunctor
@@ -112,6 +114,8 @@ public:
 	TRenderQueueElementStorage<ReflectionProbeQueueElement> m_reflectionProbes;
 	TRenderQueueElementStorage<LensFlareQueueElement> m_lensFlares;
 	TRenderQueueElementStorage<DecalQueueElement> m_decals;
+
+	Timestamp m_timestamp = 0;
 };
 
 static_assert(std::is_trivially_destructible<RenderQueueView>::value == true, "Should be trivially destructible");
@@ -125,127 +129,139 @@ public:
 
 	F32 m_earlyZDist = -1.0f; ///< Cache this.
 
-	List<FrustumComponent*> m_testedFrcs;
+	List<const FrustumComponent*> m_testedFrcs;
 	Mutex m_mtx;
 
-	void submitNewWork(FrustumComponent& frc, RenderQueue& result, ThreadHive& hive);
+	void submitNewWork(const FrustumComponent& frc, RenderQueue& result, ThreadHive& hive);
 };
 
-/// ThreadHive task to gather all visible triangles from the OccluderComponent.
-class GatherVisibleTrianglesTask
+/// A context for a specific test of a frustum component.
+/// @note Should be trivially destructible.
+class FrustumVisibilityContext
 {
 public:
-	WeakPtr<VisibilityContext> m_visCtx;
-	WeakPtr<FrustumComponent> m_frc;
+	VisibilityContext* m_visCtx = nullptr;
+	const FrustumComponent* m_frc = nullptr;
 
-	static const U TRIANGLES_INITIAL_SIZE = 10 * 3;
+	// S/W rasterizer members
+	SoftwareRasterizer* m_r = nullptr;
 	DynamicArray<Vec3> m_verts;
-	U32 m_vertCount;
-
-	SoftwareRasterizer m_r;
-
 	Atomic<U32> m_rasterizedVertCount = {0}; ///< That will be used by the RasterizeTrianglesTask.
 
-	/// Thread hive task.
-	static void callback(void* ud, U32 threadId, ThreadHive& hive)
-	{
-		GatherVisibleTrianglesTask& self = *static_cast<GatherVisibleTrianglesTask*>(ud);
-		self.gather();
-	}
+	// Visibility test members
+	DynamicArray<RenderQueueView> m_queueViews; ///< Sub result. Will be combined later.
+	ThreadHiveSemaphore* m_visTestsSignalSem = nullptr;
 
-private:
-	void gather();
+	// Gather results members
+	RenderQueue* m_renderQueue = nullptr;
 };
 
-/// ThreadHive task to rasterize triangles.
-class RasterizeTrianglesTask
+/// ThreadHive task to set the depth map of the S/W rasterizer.
+class FillRasterizerWithCoverageTask
 {
 public:
-	WeakPtr<GatherVisibleTrianglesTask> m_gatherTask;
-	U32 m_taskIdx;
-	U32 m_taskCount;
+	FrustumVisibilityContext* m_frcCtx = nullptr;
+
+	FillRasterizerWithCoverageTask(FrustumVisibilityContext* frcCtx)
+		: m_frcCtx(frcCtx)
+	{
+		ANKI_ASSERT(m_frcCtx);
+	}
 
 	/// Thread hive task.
-	static void callback(void* ud, U32 threadId, ThreadHive& hive)
+	static void callback(void* ud, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* sem)
 	{
-		RasterizeTrianglesTask& self = *static_cast<RasterizeTrianglesTask*>(ud);
-		self.rasterize();
+		FillRasterizerWithCoverageTask& self = *static_cast<FillRasterizerWithCoverageTask*>(ud);
+		self.fill();
 	}
 
 private:
-	void rasterize();
+	void fill();
 };
+static_assert(
+	std::is_trivially_destructible<FillRasterizerWithCoverageTask>::value == true, "Should be trivially destructible");
 
-/// ThreadHive task to get visible nodes from sectors.
-class GatherVisiblesFromSectorsTask
+/// ThreadHive task to get visible nodes from the octree.
+class GatherVisiblesFromOctreeTask
 {
 public:
-	WeakPtr<VisibilityContext> m_visCtx;
-	SectorGroupVisibilityTestsContext m_sectorsCtx;
-	WeakPtr<FrustumComponent> m_frc; ///< What to test against.
-	SoftwareRasterizer* m_r;
+	FrustumVisibilityContext* m_frcCtx = nullptr;
+
+	GatherVisiblesFromOctreeTask(FrustumVisibilityContext* frcCtx)
+		: m_frcCtx(frcCtx)
+	{
+		ANKI_ASSERT(m_frcCtx);
+	}
 
 	/// Thread hive task.
-	static void callback(void* ud, U32 threadId, ThreadHive& hive)
+	static void callback(void* ud, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* sem)
 	{
-		GatherVisiblesFromSectorsTask& self = *static_cast<GatherVisiblesFromSectorsTask*>(ud);
-		self.gather();
+		GatherVisiblesFromOctreeTask& self = *static_cast<GatherVisiblesFromOctreeTask*>(ud);
+		self.gather(hive, *sem);
 	}
 
 private:
-	void gather()
-	{
-		ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_ITERATE_SECTORS);
-		U testIdx = m_visCtx->m_testsCount.fetchAdd(1);
+	Array<SpatialComponent*, MAX_SPATIALS_PER_VIS_TEST> m_spatials;
+	U32 m_spatialCount = 0;
+
+	void gather(ThreadHive& hive, ThreadHiveSemaphore& sem);
 
-		m_visCtx->m_scene->getSectorGroup().findVisibleNodes(*m_frc, testIdx, m_r, m_sectorsCtx);
+	/// Submit tasks to test the m_spatials.
+	void flush(ThreadHive& hive, ThreadHiveSemaphore& sem);
+
+	static void dummyCallback(void* ud, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* sem)
+	{
 	}
 };
+static_assert(
+	std::is_trivially_destructible<GatherVisiblesFromOctreeTask>::value == true, "Should be trivially destructible");
 
 /// ThreadHive task that does the actual visibility tests.
 class VisibilityTestTask
 {
 public:
-	WeakPtr<VisibilityContext> m_visCtx;
-	WeakPtr<FrustumComponent> m_frc;
-	WeakPtr<SectorGroupVisibilityTestsContext> m_sectorsCtx;
-	U32 m_taskIdx;
-	U32 m_taskCount;
-	RenderQueueView m_result; ///< Sub result. Will be combined later.
-	Timestamp m_timestamp = 0;
-	SoftwareRasterizer* m_r ANKI_DBG_NULLIFY;
+	FrustumVisibilityContext* m_frcCtx = nullptr;
+
+	Array<SpatialComponent*, MAX_SPATIALS_PER_VIS_TEST> m_spatialsToTest;
+	U32 m_spatialToTestCount = 0;
+
+	VisibilityTestTask(FrustumVisibilityContext* frcCtx)
+		: m_frcCtx(frcCtx)
+	{
+		ANKI_ASSERT(m_frcCtx);
+	}
 
 	/// Thread hive task.
-	static void callback(void* ud, U32 threadId, ThreadHive& hive)
+	static void callback(void* ud, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* sem)
 	{
 		VisibilityTestTask& self = *static_cast<VisibilityTestTask*>(ud);
-		self.test(hive);
+		self.test(hive, threadId);
 	}
 
 private:
-	void test(ThreadHive& hive);
-	void updateTimestamp(const SceneNode& node);
+	void test(ThreadHive& hive, U32 taskId);
 
 	ANKI_USE_RESULT Bool testAgainstRasterizer(const CollisionShape& cs, const Aabb& aabb) const
 	{
-		return (m_r) ? m_r->visibilityTest(cs, aabb) : true;
+		return (m_frcCtx->m_r) ? m_frcCtx->m_r->visibilityTest(cs, aabb) : true;
 	}
 };
+static_assert(std::is_trivially_destructible<VisibilityTestTask>::value == true, "Should be trivially destructible");
 
 /// Task that combines and sorts the results.
 class CombineResultsTask
 {
 public:
-	WeakPtr<VisibilityContext> m_visCtx;
-	WeakPtr<FrustumComponent> m_frc;
-	WeakArray<VisibilityTestTask> m_tests;
+	FrustumVisibilityContext* m_frcCtx = nullptr;
 
-	WeakPtr<RenderQueue> m_results; ///< Where to store the results.
-
-	SoftwareRasterizer* m_swRast = nullptr; ///< For cleanup.
+	CombineResultsTask(FrustumVisibilityContext* frcCtx)
+		: m_frcCtx(frcCtx)
+	{
+		ANKI_ASSERT(m_frcCtx);
+	}
 
 	/// Thread hive task.
-	static void callback(void* ud, U32 threadId, ThreadHive& hive)
+	static void callback(void* ud, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* sem)
 	{
 		CombineResultsTask& self = *static_cast<CombineResultsTask*>(ud);
 		self.combine();
@@ -261,6 +277,7 @@ private:
 		WeakArray<T>& combined,
 		WeakArray<T*>* ptrCombined);
 };
+static_assert(std::is_trivially_destructible<CombineResultsTask>::value == true, "Should be trivially destructible");
 /// @}
 
 } // end namespace anki

+ 5 - 1
src/anki/scene/components/FrustumComponent.cpp

@@ -4,7 +4,6 @@
 // http://www.anki3d.org/LICENSE
 
 #include <anki/scene/components/FrustumComponent.h>
-#include <anki/scene/Visibility.h>
 
 namespace anki
 {
@@ -22,6 +21,11 @@ FrustumComponent::FrustumComponent(SceneNode* node, Frustum* frustum)
 	setEnabledVisibilityTests(FrustumComponentVisibilityTestFlag::NONE);
 }
 
+FrustumComponent::~FrustumComponent()
+{
+	m_coverageBuff.m_depthMap.destroy(getAllocator());
+}
+
 Error FrustumComponent::update(SceneNode& node, Second, Second, Bool& updated)
 {
 	updated = false;

+ 44 - 0
src/anki/scene/components/FrustumComponent.h

@@ -52,6 +52,8 @@ public:
 	/// Pass the frustum here so we can avoid the virtuals
 	FrustumComponent(SceneNode* node, Frustum* frustum);
 
+	~FrustumComponent();
+
 	Frustum& getFrustum()
 	{
 		return *m_frustum;
@@ -140,6 +142,40 @@ public:
 		return m_flags.getAny(FrustumComponentVisibilityTestFlag::ALL_TESTS);
 	}
 
+	/// The type is FillCoverageBufferCallback.
+	static void fillCoverageBufferCallback(void* userData, F32* depthValues, U32 width, U32 height)
+	{
+		ANKI_ASSERT(userData && depthValues && width > 0 && height > 0);
+		FrustumComponent& self = *static_cast<FrustumComponent*>(userData);
+
+		self.m_coverageBuff.m_depthMap.destroy(self.getAllocator());
+		self.m_coverageBuff.m_depthMap.create(self.getAllocator(), width * height);
+		memcpy(&self.m_coverageBuff.m_depthMap[0], depthValues, self.m_coverageBuff.m_depthMap.getSizeInBytes());
+
+		self.m_coverageBuff.m_depthMapWidth = width;
+		self.m_coverageBuff.m_depthMapHeight = height;
+	}
+
+	Bool hasCoverageBuffer() const
+	{
+		return m_coverageBuff.m_depthMap.getSize() > 0;
+	}
+
+	void getCoverageBufferInfo(ConstWeakArray<F32>& depthBuff, U32& width, U32& height) const
+	{
+		if(m_coverageBuff.m_depthMap.getSize() > 0)
+		{
+			depthBuff = ConstWeakArray<F32>(&m_coverageBuff.m_depthMap[0], m_coverageBuff.m_depthMap.getSize());
+			width = m_coverageBuff.m_depthMapWidth;
+			height = m_coverageBuff.m_depthMapHeight;
+		}
+		else
+		{
+			depthBuff = ConstWeakArray<F32>();
+			width = height = 0;
+		}
+	}
+
 private:
 	enum Flags
 	{
@@ -154,6 +190,14 @@ private:
 	Mat4 m_vpm = Mat4::getIdentity(); ///< View projection matrix
 
 	BitMask<U16> m_flags;
+
+	class
+	{
+	public:
+		DynamicArray<F32> m_depthMap;
+		U32 m_depthMapWidth = 0;
+		U32 m_depthMapHeight = 0;
+	} m_coverageBuff; ///< Coverage buffer for extra visibility tests.
 };
 /// @}
 

+ 13 - 4
src/anki/scene/components/SpatialComponent.cpp

@@ -17,25 +17,34 @@ SpatialComponent::SpatialComponent(SceneNode* node, const CollisionShape* shape)
 {
 	ANKI_ASSERT(shape);
 	markForUpdate();
+	m_octreeInfo.m_userData = this;
 }
 
 SpatialComponent::~SpatialComponent()
 {
 	getSceneGraph().getSectorGroup().spatialDeleted(this);
+
+	if(m_placed)
+	{
+		getSceneGraph().getOctree().remove(m_octreeInfo);
+	}
 }
 
 Error SpatialComponent::update(SceneNode&, Second, Second, Bool& updated)
 {
-	m_flags.unset(Flag::VISIBLE_ANY);
-
-	updated = m_flags.get(Flag::MARKED_FOR_UPDATE);
+	updated = m_markedForUpdate;
 	if(updated)
 	{
 		m_shape->computeAabb(m_aabb);
 		getSceneGraph().getSectorGroup().spatialUpdated(this);
-		m_flags.unset(Flag::MARKED_FOR_UPDATE);
+		m_markedForUpdate = false;
+
+		getSceneGraph().getOctree().place(m_aabb, &m_octreeInfo);
+		m_placed = true;
 	}
 
+	m_octreeInfo.reset();
+
 	return Error::NONE;
 }
 

+ 7 - 33
src/anki/scene/components/SpatialComponent.h

@@ -5,8 +5,8 @@
 
 #pragma once
 
-#include <anki/scene/Common.h>
 #include <anki/scene/components/SceneComponent.h>
+#include <anki/scene/Octree.h>
 #include <anki/Collision.h>
 #include <anki/util/BitMask.h>
 #include <anki/util/Enum.h>
@@ -66,13 +66,7 @@ public:
 	/// Check if it's confined in a single sector.
 	Bool getSingleSector() const
 	{
-		return m_flags.get(Flag::SINGLE_SECTOR);
-	}
-
-	/// Confine it or not in a single sector.
-	void setSingleSector(Bool yes)
-	{
-		m_flags.set(Flag::SINGLE_SECTOR, yes);
+		return false;
 	}
 
 	/// Used for sorting spatials. In most object the origin is the center of mass but for cameras the origin is the
@@ -91,19 +85,7 @@ public:
 	/// The derived class has to manually call this method when the collision shape got updated.
 	void markForUpdate()
 	{
-		m_flags.set(Flag::MARKED_FOR_UPDATE);
-	}
-
-	/// Set if visible by a camera
-	void setVisibleByCamera(Bool visible)
-	{
-		m_flags.set(Flag::VISIBLE_CAMERA, visible);
-	}
-
-	/// Check if visible by camera
-	Bool getVisibleByCamera() const
-	{
-		return m_flags.get(Flag::VISIBLE_CAMERA);
+		m_markedForUpdate = true;
 	}
 
 	/// @name SceneComponent overrides
@@ -112,23 +94,15 @@ public:
 	/// @}
 
 private:
-	/// Spatial flags
-	enum class Flag : U8
-	{
-		NONE = 0,
-		VISIBLE_CAMERA = 1 << 1,
-		VISIBLE_LIGHT = 1 << 2,
-		VISIBLE_ANY = VISIBLE_CAMERA | VISIBLE_LIGHT,
-		MARKED_FOR_UPDATE = 1 << 3,
-		SINGLE_SECTOR = 1 << 4
-	};
-	ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(Flag, friend)
+	Bool8 m_markedForUpdate = false;
+	Bool8 m_placed = false;
 
 	const CollisionShape* m_shape;
-	BitMask<Flag> m_flags;
 	Aabb m_aabb; ///< A faster shape
 	Vec4 m_origin = Vec4(MAX_F32, MAX_F32, MAX_F32, 0.0);
 	List<SectorNode*> m_sectorInfo;
+
+	OctreePlaceable m_octreeInfo;
 };
 /// @}
 

+ 7 - 24
src/anki/util/Functions.h

@@ -184,31 +184,14 @@ struct TypesAreTheSame<T, T>
 };
 
 template<typename T>
-void memorySet(T* dest, T value, const PtrSize count);
-
-#define ANKI_SPECIALISE_MEMORY_SET(T) \
-	template<> \
-	inline void memorySet(T* dest, T value, const PtrSize count) \
-	{ \
-		ANKI_ASSERT(dest); \
-		const T* end = dest + count; \
-		while(dest != end) \
-		{ \
-			memcpy(reinterpret_cast<char*>(dest), &value, sizeof(T)); \
-			++dest; \
-		} \
+void memorySet(T* dest, T value, PtrSize count)
+{
+	ANKI_ASSERT(dest);
+	while(count--)
+	{
+		dest[count] = value;
 	}
-
-ANKI_SPECIALISE_MEMORY_SET(U8)
-ANKI_SPECIALISE_MEMORY_SET(I8)
-ANKI_SPECIALISE_MEMORY_SET(U16)
-ANKI_SPECIALISE_MEMORY_SET(I16)
-ANKI_SPECIALISE_MEMORY_SET(U32)
-ANKI_SPECIALISE_MEMORY_SET(I32)
-ANKI_SPECIALISE_MEMORY_SET(U64)
-ANKI_SPECIALISE_MEMORY_SET(I64)
-
-#undef ANKI_SPECIALISE_MEMORY_SET
+}
 
 /// Zero memory of an object
 template<typename T>

+ 41 - 105
src/anki/util/ThreadHive.cpp

@@ -32,7 +32,7 @@ public:
 		, m_hive(hive)
 	{
 		ANKI_ASSERT(hive);
-		m_thread.start(this, threadCallback, (pinToCores) ? m_id : -1);
+		m_thread.start(this, threadCallback, (pinToCores) ? I(m_id) : -1);
 	}
 
 private:
@@ -46,7 +46,7 @@ private:
 	}
 };
 
-class ThreadHive::Task
+class ThreadHive::Task : public NonCopyable
 {
 public:
 	Task* m_next; ///< Next in the list.
@@ -54,21 +54,8 @@ public:
 	ThreadHiveTaskCallback m_cb; ///< Callback that defines the task.
 	void* m_arg; ///< Args for the callback.
 
-	Task** m_deps;
-	U16 m_depCount;
-	Bool8 m_othersDepend; ///< Other tasks depend on this one.
-
-	Task()
-	{
-	}
-
-	Task(const Task& b) = delete;
-	Task& operator=(const Task& b) = delete;
-
-	Bool done() const
-	{
-		return m_cb == nullptr;
-	}
+	ThreadHiveSemaphore* m_waitSemaphore;
+	ThreadHiveSemaphore* m_signalSemaphore;
 };
 
 ThreadHive::ThreadHive(U threadCount, GenericMemoryPoolAllocator<U8> alloc, Bool pinToCores)
@@ -117,100 +104,51 @@ void ThreadHive::submitTasks(ThreadHiveTask* tasks, const U taskCount)
 	// Allocate tasks
 	Task* const htasks = m_alloc.newArray<Task>(taskCount);
 
-	// Allocate the dependency handles
-	U depCount = 0;
-	for(U i = 0; i < taskCount; ++i)
-	{
-		depCount += tasks[i].m_inDependencies.getSize();
-	}
-
-	Task** depHandles;
-	if(depCount)
-	{
-		depHandles = m_alloc.newArray<Task*>(depCount);
-	}
-	else
-	{
-		depHandles = nullptr;
-	}
-
-	depCount = 0;
-
 	// Initialize tasks
+	Task* prevTask = nullptr;
 	for(U i = 0; i < taskCount; ++i)
 	{
 		const ThreadHiveTask& inTask = tasks[i];
 		Task& outTask = htasks[i];
 
+		outTask.m_next = nullptr;
 		outTask.m_cb = inTask.m_callback;
 		outTask.m_arg = inTask.m_argument;
-		outTask.m_depCount = 0;
-		outTask.m_next = nullptr;
-		outTask.m_othersDepend = false;
+		outTask.m_waitSemaphore = inTask.m_waitSemaphore;
+		outTask.m_signalSemaphore = inTask.m_signalSemaphore;
 
-		// Set the dependencies
-		if(inTask.m_inDependencies.getSize() > 0)
+		// Connect tasks
+		if(prevTask)
 		{
-			outTask.m_deps = &depHandles[depCount];
-			depCount += inTask.m_inDependencies.getSize();
-		}
-		else
-		{
-			outTask.m_deps = nullptr;
+			prevTask->m_next = &outTask;
 		}
+		prevTask = &outTask;
 	}
 
 	// Push work
 	{
 		LockGuard<Mutex> lock(m_mtx);
 
-		for(U i = 0; i < taskCount; ++i)
+		if(m_head != nullptr)
 		{
-			const ThreadHiveTask& inTask = tasks[i];
-			Task& outTask = htasks[i];
-
-			for(U j = 0; j < inTask.m_inDependencies.getSize(); ++j)
-			{
-				ThreadHiveDependencyHandle dep = inTask.m_inDependencies[j];
-				ANKI_ASSERT(dep);
-				Task* depTask = static_cast<Task*>(dep);
-
-				if(!depTask->done())
-				{
-					outTask.m_deps[outTask.m_depCount++] = depTask;
-					depTask->m_othersDepend = true;
-				}
-			}
-
-			// Push to the list
-			ANKI_HIVE_DEBUG_PRINT(
-				"pushing back %p (udata %p)\n", static_cast<void*>(&outTask), static_cast<void*>(outTask.m_arg));
-			if(m_head != nullptr)
-			{
-				ANKI_ASSERT(m_tail && m_head);
-				m_tail->m_next = &outTask;
-				m_tail = &outTask;
-			}
-			else
-			{
-				ANKI_ASSERT(m_tail == nullptr);
-				m_head = &outTask;
-				m_tail = m_head;
-			}
+			ANKI_ASSERT(m_tail && m_head);
+			m_tail->m_next = &htasks[0];
+			m_tail = &htasks[taskCount - 1];
+		}
+		else
+		{
+			ANKI_ASSERT(m_tail == nullptr);
+			m_head = &htasks[0];
+			m_tail = &htasks[taskCount - 1];
 		}
 
 		m_pendingTasks += taskCount;
 
 		ANKI_HIVE_DEBUG_PRINT("submit tasks\n");
-		// Notify all threads
-		m_cvar.notifyAll();
 	}
 
-	// Set the out dependencies
-	for(U i = 0; i < taskCount; ++i)
-	{
-		tasks[i].m_outDependency = static_cast<ThreadHiveDependencyHandle>(&htasks[i]);
-	}
+	// Notify all threads
+	m_cvar.notifyAll();
 }
 
 void ThreadHive::threadRun(U threadId)
@@ -223,7 +161,20 @@ void ThreadHive::threadRun(U threadId)
 		ANKI_ASSERT(task && task->m_cb);
 		ANKI_HIVE_DEBUG_PRINT(
 			"tid: %lu will exec %p (udata: %p)\n", threadId, static_cast<void*>(task), static_cast<void*>(task->m_arg));
-		task->m_cb(task->m_arg, threadId, *this);
+		task->m_cb(task->m_arg, threadId, *this, task->m_signalSemaphore);
+
+#if ANKI_EXTRA_CHECKS
+		task->m_cb = nullptr;
+#endif
+
+		// Signal the semaphore as early as possible
+		if(task->m_signalSemaphore)
+		{
+			const U32 out = task->m_signalSemaphore->m_atomic.fetchSub(1);
+			(void)out;
+			ANKI_ASSERT(out > 0u);
+			ANKI_HIVE_DEBUG_PRINT("\tsem is %u\n", out - 1u);
+		}
 	}
 
 	ANKI_HIVE_DEBUG_PRINT("tid: %lu thread quits!\n", threadId);
@@ -238,12 +189,11 @@ Bool ThreadHive::waitForWork(U threadId, Task*& task)
 	// Complete the previous task
 	if(task)
 	{
-		task->m_cb = nullptr;
 		--m_pendingTasks;
 
-		if(task->m_othersDepend || m_pendingTasks == 0)
+		if(task->m_signalSemaphore || m_pendingTasks == 0)
 		{
-			// A dependency got resolved or we are out of tasks. Wake them all
+			// A dependency maybe got resolved or we are out of tasks. Wake them all
 			ANKI_HIVE_DEBUG_PRINT("tid: %lu wake all\n", threadId);
 			m_cvar.notifyAll();
 		}
@@ -266,20 +216,8 @@ ThreadHive::Task* ThreadHive::getNewTask()
 	Task* task = m_head;
 	while(task)
 	{
-		ANKI_ASSERT(!task->done());
-
 		// Check if there are dependencies
-		Bool allDepsCompleted = true;
-		for(U j = 0; j < task->m_depCount; ++j)
-		{
-			Task* depTask = task->m_deps[j];
-
-			if(!depTask->done())
-			{
-				allDepsCompleted = false;
-				break;
-			}
-		}
+		const Bool allDepsCompleted = task->m_waitSemaphore == nullptr || task->m_waitSemaphore->m_atomic.load() == 0;
 
 		if(allDepsCompleted)
 		{
@@ -324,8 +262,6 @@ void ThreadHive::waitAllTasks()
 
 	m_head = nullptr;
 	m_tail = nullptr;
-	m_allocatedTasks = 0;
-	m_allocatedDeps = 0;
 	m_alloc.getMemoryPool().reset();
 
 	ANKI_HIVE_DEBUG_PRINT("mt: done waiting all\n");

+ 49 - 8
src/anki/util/ThreadHive.h

@@ -19,11 +19,29 @@ class ThreadHive;
 /// @{
 
 /// Opaque handle that defines a ThreadHive depedency. @memberof ThreadHive
-using ThreadHiveDependencyHandle = void*;
+class ThreadHiveSemaphore
+{
+	friend class ThreadHive;
+
+public:
+	/// Increase the value of the semaphore. It's easy to brake things with that.
+	/// @note It's thread-safe.
+	void increaseSemaphore(U32 increase)
+	{
+		m_atomic.fetchAdd(increase);
+	}
+
+private:
+	Atomic<U32> m_atomic;
+
+	// No need to construct it or delete it
+	ThreadHiveSemaphore() = delete;
+	~ThreadHiveSemaphore() = delete;
+};
 
 /// The callback that defines a ThreadHibe task.
 /// @memberof ThreadHive
-using ThreadHiveTaskCallback = void (*)(void*, U32 threadId, ThreadHive& hive);
+using ThreadHiveTaskCallback = void (*)(void*, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* signalSemaphore);
 
 /// Task for the ThreadHive. @memberof ThreadHive
 class ThreadHiveTask
@@ -35,11 +53,12 @@ public:
 	/// Arguments to pass to the m_callback.
 	void* m_argument ANKI_DBG_NULLIFY;
 
-	/// The tasks that this task will depend on.
-	WeakArray<ThreadHiveDependencyHandle> m_inDependencies;
+	/// The task will start when that semaphore reaches zero.
+	ThreadHiveSemaphore* m_waitSemaphore = nullptr;
 
-	/// Will be filled after the submission of the task. Can be used to set dependencies to future tasks.
-	ThreadHiveDependencyHandle m_outDependency;
+	/// When the task is completed that semaphore will be decremented by one. Can be used to set dependencies to future
+	/// tasks.
+	ThreadHiveSemaphore* m_signalSemaphore = nullptr;
 };
 
 /// A scheduler of small tasks. It takes a number of tasks and schedules them in one of the threads. The tasks can
@@ -59,6 +78,30 @@ public:
 		return m_threadCount;
 	}
 
+	/// Create a new semaphore with some initial value.
+	/// @param initialValue  Can't be zero.
+	ThreadHiveSemaphore* newSemaphore(const U32 initialValue)
+	{
+		ANKI_ASSERT(initialValue > 0);
+		PtrSize alignment = alignof(ThreadHiveSemaphore);
+		ThreadHiveSemaphore* sem =
+			reinterpret_cast<ThreadHiveSemaphore*>(m_alloc.allocate(sizeof(ThreadHiveSemaphore), &alignment));
+		sem->m_atomic.set(initialValue);
+		return sem;
+	}
+
+	/// Allocate some scratch memory. The memory becomes invalid after waitAllTasks() is called.
+	void* allocateScratchMemory(PtrSize size, U32 alignment)
+	{
+		ANKI_ASSERT(size > 0 && alignment > 0);
+		PtrSize align = alignment;
+		void* out = m_alloc.allocate(size, &align);
+#if ANKI_ASSERTS_ENABLED
+		memset(out, 0, size);
+#endif
+		return out;
+	}
+
 	/// Submit tasks. The ThreadHiveTaskCallback callbacks can also call this.
 	void submitTasks(ThreadHiveTask* tasks, const U taskCount);
 
@@ -89,8 +132,6 @@ private:
 	Task* m_tail = nullptr; ///< Tail of the task list.
 	Bool m_quit = false;
 	U32 m_pendingTasks = 0;
-	U32 m_allocatedTasks = 0;
-	U32 m_allocatedDeps = 0;
 
 	Mutex m_mtx;
 	ConditionVariable m_cvar;

+ 1 - 1
src/anki/util/ThreadPosix.cpp

@@ -88,7 +88,7 @@ void Thread::start(void* userData, ThreadCallback callback, I pinToCore)
 	I err = pthread_create(thread, &attr, pthreadCallback, this);
 	if(err)
 	{
-		ANKI_UTIL_LOGF("pthread_create() failed");
+		ANKI_UTIL_LOGF("pthread_create() failed: %d", err);
 	}
 	else
 	{

+ 0 - 26
src/anki/util/Tracer.h

@@ -8,7 +8,6 @@
 #include <anki/util/File.h>
 #include <anki/util/List.h>
 #include <anki/util/ObjectAllocator.h>
-#include <anki/util/Singleton.h>
 
 namespace anki
 {
@@ -97,31 +96,6 @@ private:
 
 	static void getSpreadsheetColumnName(U column, Array<char, 3>& arr);
 };
-
-/// Tracer singleton.
-using TracerSingleton = Singleton<Tracer>;
-
-/// Convenience class to trace an event.
-class TraceScopedEvent
-{
-public:
-	TraceScopedEvent(const char* name)
-		: m_name(name)
-		, m_tracer(&TracerSingleton::get())
-	{
-		m_handle = m_tracer->beginEvent();
-	}
-
-	~TraceScopedEvent()
-	{
-		m_tracer->endEvent(m_name, m_handle);
-	}
-
-private:
-	const char* m_name;
-	TracerEventHandle m_handle;
-	Tracer* m_tracer;
-};
 /// @}
 
 } // end namespace anki

+ 1 - 1
tests/gr/StackGpuAllocator.cpp

@@ -75,7 +75,7 @@ public:
 	Atomic<U32> m_allocCount;
 };
 
-static void doAllocation(void* arg, U32 threadId, ThreadHive& hive)
+static void doAllocation(void* arg, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* sem)
 {
 	TestContext* ctx = static_cast<TestContext*>(arg);
 

+ 5 - 4
tests/scene/Octree.cpp

@@ -35,6 +35,7 @@ ANKI_TEST(Scene, Octree)
 			if(mode == 0)
 			{
 				// Place
+				placeables[i].m_userData = &placeables[i];
 				octree.place(volume, &placeables[i]);
 				placed.push_back(i);
 			}
@@ -54,16 +55,16 @@ ANKI_TEST(Scene, Octree)
 					placeables[idx].reset();
 				}
 
-				DynamicArrayAuto<OctreePlaceable*> arr(alloc);
-				octree.gatherVisible(frustum, 0, arr);
+				DynamicArrayAuto<void*> arr(alloc);
+				octree.gatherVisible(frustum, 0, nullptr, nullptr, arr);
 
 				ANKI_TEST_EXPECT_EQ(arr.getSize(), placed.size());
 				for(U32 idx : placed)
 				{
 					Bool found = false;
-					for(OctreePlaceable* placeable : arr)
+					for(void* placeable : arr)
 					{
-						if(&placeables[idx] == placeable)
+						if(&placeables[idx] == static_cast<OctreePlaceable*>(placeable))
 						{
 							found = true;
 							break;

+ 17 - 13
tests/util/ThreadHive.cpp

@@ -29,13 +29,13 @@ public:
 	};
 };
 
-static void decNumber(void* arg, U32, ThreadHive& hive)
+static void decNumber(void* arg, U32, ThreadHive& hive, ThreadHiveSemaphore* sem)
 {
 	ThreadHiveTestContext* ctx = static_cast<ThreadHiveTestContext*>(arg);
 	ctx->m_countAtomic.fetchSub(2);
 }
 
-static void incNumber(void* arg, U32, ThreadHive& hive)
+static void incNumber(void* arg, U32, ThreadHive& hive, ThreadHiveSemaphore* sem)
 {
 	ThreadHiveTestContext* ctx = static_cast<ThreadHiveTestContext*>(arg);
 	ctx->m_countAtomic.fetchAdd(4);
@@ -43,7 +43,7 @@ static void incNumber(void* arg, U32, ThreadHive& hive)
 	hive.submitTask(decNumber, arg);
 }
 
-static void taskToWaitOn(void* arg, U32, ThreadHive& hive)
+static void taskToWaitOn(void* arg, U32, ThreadHive& hive, ThreadHiveSemaphore* sem)
 {
 	ThreadHiveTestContext* ctx = static_cast<ThreadHiveTestContext*>(arg);
 	HighRezTimer::sleep(1.0);
@@ -51,7 +51,7 @@ static void taskToWaitOn(void* arg, U32, ThreadHive& hive)
 	HighRezTimer::sleep(0.1);
 }
 
-static void taskToWait(void* arg, U32 threadId, ThreadHive& hive)
+static void taskToWait(void* arg, U32 threadId, ThreadHive& hive, ThreadHiveSemaphore* sem)
 {
 	ThreadHiveTestContext* ctx = static_cast<ThreadHiveTestContext*>(arg);
 	U prev = ctx->m_countAtomic.fetchAdd(1);
@@ -90,17 +90,20 @@ ANKI_TEST(Util, ThreadHive)
 		ThreadHiveTask task;
 		task.m_callback = taskToWaitOn;
 		task.m_argument = &ctx;
+		task.m_signalSemaphore = hive.newSemaphore(1);
 
 		hive.submitTasks(&task, 1);
 
 		const U DEP_TASKS = 10;
 		ThreadHiveTask dtasks[DEP_TASKS];
+		ThreadHiveSemaphore* sem = hive.newSemaphore(DEP_TASKS);
 
 		for(U i = 0; i < DEP_TASKS; ++i)
 		{
 			dtasks[i].m_callback = taskToWait;
 			dtasks[i].m_argument = &ctx;
-			dtasks[i].m_inDependencies = WeakArray<ThreadHiveDependencyHandle>(&task.m_outDependency, 1);
+			dtasks[i].m_waitSemaphore = task.m_signalSemaphore;
+			dtasks[i].m_signalSemaphore = sem;
 		}
 
 		hive.submitTasks(&dtasks[0], DEP_TASKS);
@@ -111,7 +114,7 @@ ANKI_TEST(Util, ThreadHive)
 		{
 			dtasks2[i].m_callback = taskToWait;
 			dtasks2[i].m_argument = &ctx;
-			dtasks2[i].m_inDependencies = WeakArray<ThreadHiveDependencyHandle>(&dtasks[i].m_outDependency, 1);
+			dtasks2[i].m_waitSemaphore = sem;
 		}
 
 		hive.submitTasks(&dtasks2[0], DEP_TASKS);
@@ -128,7 +131,7 @@ ANKI_TEST(Util, ThreadHive)
 		ctx.m_count = 0;
 
 		I number = 0;
-		ThreadHiveDependencyHandle dep = 0;
+		ThreadHiveSemaphore* sem = nullptr;
 
 		const U SUBMISSION_COUNT = 100;
 		const U TASK_COUNT = 1000;
@@ -143,21 +146,22 @@ ANKI_TEST(Util, ThreadHive)
 				ThreadHiveTask task;
 				task.m_callback = (cb) ? incNumber : decNumber;
 				task.m_argument = &ctx;
+				task.m_signalSemaphore = hive.newSemaphore(1);
 
-				if((rand() % 3) == 0 && j > 0 && dep)
+				if((rand() % 3) == 0 && j > 0 && sem)
 				{
-					task.m_inDependencies = WeakArray<ThreadHiveDependencyHandle>(&dep, 1);
+					task.m_waitSemaphore = sem;
 				}
 
 				hive.submitTasks(&task, 1);
 
 				if((rand() % 7) == 0)
 				{
-					dep = task.m_outDependency;
+					sem = task.m_signalSemaphore;
 				}
 			}
 
-			dep = 0;
+			sem = nullptr;
 			hive.waitAllTasks();
 		}
 
@@ -199,7 +203,7 @@ public:
 		}
 	}
 
-	static void callback(void* arg, U32, ThreadHive& hive)
+	static void callback(void* arg, U32, ThreadHive& hive, ThreadHiveSemaphore* sem)
 	{
 		static_cast<FibTask*>(arg)->doWork(hive);
 	}
@@ -223,7 +227,7 @@ ANKI_TEST(Util, ThreadHiveBench)
 
 	const U32 threadCount = getCpuCoresCount();
 	HeapAllocator<U8> alloc(allocAligned, nullptr);
-	ThreadHive hive(threadCount, alloc);
+	ThreadHive hive(threadCount, alloc, true);
 
 	StackAllocator<U8> salloc(allocAligned, nullptr, 1024);
 	Atomic<U64> sum = {0};