Browse Source

Change the way drawcall stats are calculated

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
31e7779335

+ 12 - 13
AnKi/Core/App.cpp

@@ -45,11 +45,10 @@ namespace anki {
 android_app* g_androidApp = nullptr;
 #endif
 
-StatCounter g_cpuTotalTime(StatCategory::kTime, "CPU total", StatFlag::kMilisecond | StatFlag::kShowAverage);
-static StatCounter g_cpuAllocatedMem(StatCategory::kCpuMem, "Total", StatFlag::kBytes | StatFlag::kThreadSafe);
-static StatCounter g_cpuAllocationCount(StatCategory::kCpuMem, "Allocations/frame",
-										StatFlag::kBytes | StatFlag::kZeroEveryFrame | StatFlag::kThreadSafe);
-static StatCounter g_cpuFreesCount(StatCategory::kCpuMem, "Frees/frame", StatFlag::kBytes | StatFlag::kZeroEveryFrame | StatFlag::kThreadSafe);
+StatCounter g_cpuTotalTimeStatVar(StatCategory::kTime, "CPU total", StatFlag::kMilisecond | StatFlag::kShowAverage | StatFlag::kMainThreadUpdates);
+static StatCounter g_cpuAllocatedMemStatVar(StatCategory::kCpuMem, "Total", StatFlag::kBytes);
+static StatCounter g_cpuAllocationCountStatVar(StatCategory::kCpuMem, "Allocations/frame", StatFlag::kBytes | StatFlag::kZeroEveryFrame);
+static StatCounter g_cpuFreesCountStatVar(StatCategory::kCpuMem, "Frees/frame", StatFlag::kBytes | StatFlag::kZeroEveryFrame);
 
 NumericCVar<U32> g_windowWidthCVar(CVarSubsystem::kCore, "Width", 1920, 16, 16 * 1024, "Width");
 NumericCVar<U32> g_windowHeightCVar(CVarSubsystem::kCore, "Height", 1080, 16, 16 * 1024, "Height");
@@ -71,9 +70,9 @@ NumericCVar<F32> g_lod1MaxDistanceCVar(CVarSubsystem::kCore, "Lod1MaxDistance",
 									   "Distance that will be used to calculate the LOD 1");
 
 #if ANKI_PLATFORM_MOBILE
-static StatCounter g_maliGpuActive(StatCategory::kGpuMisc, "Mali active cycles");
-static StatCounter g_maliGpuReadBandwidth(StatCategory::kGpuMisc, "Mali read bandwidth");
-static StatCounter g_maliGpuWriteBandwidth(StatCategory::kGpuMisc, "Mali write bandwidth");
+static StatCounter g_maliGpuActiveStatVar(StatCategory::kGpuMisc, "Mali active cycles", StatFlag::kMainThreadUpdates);
+static StatCounter g_maliGpuReadBandwidthStatVar(StatCategory::kGpuMisc, "Mali read bandwidth", StatFlag::kMainThreadUpdates);
+static StatCounter g_maliGpuWriteBandwidthStatVar(StatCategory::kGpuMisc, "Mali write bandwidth", StatFlag::kMainThreadUpdates);
 
 static BoolCVar g_maliHwCountersCVar(CVarSubsystem::kCore, "MaliHwCounters", false, "Enable Mali counters");
 #endif
@@ -111,8 +110,8 @@ void* App::statsAllocCallback(void* userData, void* ptr, PtrSize size, [[maybe_u
 		out = static_cast<void*>(allocation);
 
 		// Update stats
-		g_cpuAllocatedMem.atomicIncrement(size);
-		g_cpuAllocationCount.atomicIncrement(1);
+		g_cpuAllocatedMemStatVar.increment(size);
+		g_cpuAllocationCountStatVar.increment(1);
 	}
 	else
 	{
@@ -125,8 +124,8 @@ void* App::statsAllocCallback(void* userData, void* ptr, PtrSize size, [[maybe_u
 		ANKI_ASSERT(allocation->m_allocatedSize > 0);
 
 		// Update stats
-		g_cpuAllocatedMem.atomicDecrement(allocation->m_allocatedSize);
-		g_cpuFreesCount.atomicIncrement(1);
+		g_cpuAllocatedMemStatVar.decrement(allocation->m_allocatedSize);
+		g_cpuFreesCountStatVar.increment(1);
 
 		// Free
 		self->m_originalAllocCallback(self->m_originalAllocUserData, allocation, 0, 0);
@@ -482,7 +481,7 @@ Error App::mainLoop()
 			// Sleep
 			const Second endTime = HighRezTimer::getCurrentTime();
 			const Second frameTime = endTime - startTime;
-			g_cpuTotalTime.set((frameTime - grTime) * 1000.0);
+			g_cpuTotalTimeStatVar.set((frameTime - grTime) * 1000.0);
 			if(!benchmarkMode) [[likely]]
 			{
 				const Second timerTick = 1.0_sec / Second(g_targetFpsCVar.get());

+ 2 - 2
AnKi/Core/App.h

@@ -23,8 +23,8 @@ extern NumericCVar<U32> g_targetFpsCVar;
 extern NumericCVar<F32> g_lod0MaxDistanceCVar;
 extern NumericCVar<F32> g_lod1MaxDistanceCVar;
 extern NumericCVar<U32> g_displayStatsCVar;
-extern StatCounter g_cpuTotalTime;
-extern StatCounter g_rendererGpuTime;
+extern StatCounter g_cpuTotalTimeStatVar;
+extern StatCounter g_rendererGpuTimeStatVar;
 
 /// The core class of the engine.
 class App

+ 8 - 6
AnKi/Core/GpuMemory/GpuSceneBuffer.cpp

@@ -13,9 +13,11 @@
 
 namespace anki {
 
-static StatCounter g_gpuSceneBufferAllocatedSize(StatCategory::kGpuMem, "GPU scene allocated", StatFlag::kBytes);
-static StatCounter g_gpuSceneBufferTotal(StatCategory::kGpuMem, "GPU scene total", StatFlag::kBytes);
-static StatCounter g_gpuSceneBufferFragmentation(StatCategory::kGpuMem, "GPU scene fragmentation", StatFlag::kFloat);
+static StatCounter g_gpuSceneBufferAllocatedSizeStatVar(StatCategory::kGpuMem, "GPU scene allocated",
+														StatFlag::kBytes | StatFlag::kMainThreadUpdates);
+static StatCounter g_gpuSceneBufferTotalStatVar(StatCategory::kGpuMem, "GPU scene total", StatFlag::kBytes | StatFlag::kMainThreadUpdates);
+static StatCounter g_gpuSceneBufferFragmentationStatVar(StatCategory::kGpuMem, "GPU scene fragmentation",
+														StatFlag::kFloat | StatFlag::kMainThreadUpdates);
 
 static NumericCVar<PtrSize> g_gpuSceneInitialSizeCVar(CVarSubsystem::kCore, "GpuSceneInitialSize", 64_MB, 16_MB, 2_GB,
 													  "Global memory for the GPU scene");
@@ -41,9 +43,9 @@ void GpuSceneBuffer::updateStats() const
 	PtrSize userAllocatedSize, totalSize;
 	m_pool.getStats(externalFragmentation, userAllocatedSize, totalSize);
 
-	g_gpuSceneBufferAllocatedSize.set(userAllocatedSize);
-	g_gpuSceneBufferTotal.set(totalSize);
-	g_gpuSceneBufferFragmentation.set(externalFragmentation);
+	g_gpuSceneBufferAllocatedSizeStatVar.set(userAllocatedSize);
+	g_gpuSceneBufferTotalStatVar.set(totalSize);
+	g_gpuSceneBufferFragmentationStatVar.set(externalFragmentation);
 }
 
 /// It packs the source and destination offsets as well as the size of the patch itself.

+ 2 - 2
AnKi/Core/GpuMemory/RebarTransientMemoryPool.cpp

@@ -12,7 +12,7 @@
 
 namespace anki {
 
-static StatCounter g_rebarUserMemory(StatCategory::kGpuMem, "ReBAR used mem", StatFlag::kBytes);
+static StatCounter g_rebarUserMemoryStatVar(StatCategory::kGpuMem, "ReBAR used mem", StatFlag::kBytes | StatFlag::kMainThreadUpdates);
 
 static NumericCVar<PtrSize> g_rebarGpuMemorySizeCvar(CVarSubsystem::kCore, "RebarGpuMemorySize", 24_MB, 1_MB, 1_GB,
 													 "ReBAR: always mapped GPU memory");
@@ -90,7 +90,7 @@ void RebarTransientMemoryPool::endFrame()
 	}
 
 	ANKI_TRACE_INC_COUNTER(ReBarUsedMemory, usedMemory);
-	g_rebarUserMemory.set(usedMemory);
+	g_rebarUserMemoryStatVar.set(usedMemory);
 }
 
 } // end namespace anki

+ 7 - 6
AnKi/Core/GpuMemory/UnifiedGeometryBuffer.cpp

@@ -10,9 +10,10 @@
 
 namespace anki {
 
-static StatCounter g_unifiedGeomBufferAllocatedSize(StatCategory::kGpuMem, "UGB allocated", StatFlag::kBytes);
-static StatCounter g_unifiedGeomBufferTotal(StatCategory::kGpuMem, "UGB total", StatFlag::kBytes);
-static StatCounter g_unifiedGeomBufferFragmentation(StatCategory::kGpuMem, "UGB fragmentation", StatFlag::kFloat);
+static StatCounter g_unifiedGeomBufferAllocatedSizeStatVar(StatCategory::kGpuMem, "UGB allocated", StatFlag::kBytes | StatFlag::kMainThreadUpdates);
+static StatCounter g_unifiedGeomBufferTotalStatVar(StatCategory::kGpuMem, "UGB total", StatFlag::kBytes | StatFlag::kMainThreadUpdates);
+static StatCounter g_unifiedGeomBufferFragmentationStatVar(StatCategory::kGpuMem, "UGB fragmentation",
+														   StatFlag::kFloat | StatFlag::kMainThreadUpdates);
 
 static NumericCVar<PtrSize> g_unifiedGometryBufferSizeCvar(CVarSubsystem::kCore, "UnifiedGeometryBufferSize", 128_MB, 16_MB, 2_GB,
 														   "Global index and vertex buffer size");
@@ -44,9 +45,9 @@ void UnifiedGeometryBuffer::updateStats() const
 	PtrSize userAllocatedSize, totalSize;
 	m_pool.getStats(externalFragmentation, userAllocatedSize, totalSize);
 
-	g_unifiedGeomBufferAllocatedSize.set(userAllocatedSize);
-	g_unifiedGeomBufferTotal.set(totalSize);
-	g_unifiedGeomBufferFragmentation.set(externalFragmentation);
+	g_unifiedGeomBufferAllocatedSizeStatVar.set(userAllocatedSize);
+	g_unifiedGeomBufferTotalStatVar.set(totalSize);
+	g_unifiedGeomBufferFragmentationStatVar.set(externalFragmentation);
 }
 
 } // end namespace anki

+ 47 - 5
AnKi/Core/StatsSet.cpp

@@ -26,14 +26,51 @@ void StatsSet::endFrame()
 	{
 		StatCounter& counter = *m_statCounterArr[i];
 		const Bool needsReset = !!(counter.m_flags & StatFlag::kZeroEveryFrame);
-		const Bool atomic = !!(counter.m_flags & StatFlag::kThreadSafe);
-		if(needsReset && atomic)
+		const Bool atomic = !(counter.m_flags & StatFlag::kMainThreadUpdates);
+		const Bool isFloat = !!(counter.m_flags & StatFlag::kFloat);
+
+		// Store the previous value
+		if(isFloat)
 		{
-			counter.m_atomic.store(0);
+			if(atomic)
+			{
+				LockGuard lock(counter.m_floatLock);
+				counter.m_prevValuef = counter.m_f;
+
+				if(needsReset)
+				{
+					counter.m_f = 0.0;
+				}
+			}
+			else
+			{
+				counter.m_prevValuef = counter.m_f;
+
+				if(needsReset)
+				{
+					counter.m_f = 0;
+				}
+			}
 		}
-		else if(needsReset)
+		else
 		{
-			counter.m_u = 0;
+			if(atomic && needsReset)
+			{
+				counter.m_prevValueu = counter.m_atomic.exchange(0);
+			}
+			else if(atomic && !needsReset)
+			{
+				counter.m_prevValueu = counter.m_atomic.load();
+			}
+			else if(!atomic && needsReset)
+			{
+				counter.m_prevValueu = counter.m_u;
+				counter.m_u = 0;
+			}
+			else if(!atomic && !needsReset)
+			{
+				counter.m_prevValueu = counter.m_u;
+			}
 		}
 	}
 }
@@ -42,6 +79,11 @@ void StatsSet::registerCounter(StatCounter* counter)
 {
 	ANKI_ASSERT(counter);
 
+	if(m_mainThreadId == kMaxU64)
+	{
+		m_mainThreadId = Thread::getCurrentThreadId();
+	}
+
 	// Try grow the array
 	if(m_statCounterArrSize + 1 > m_statCounterArrStorageSize)
 	{

+ 98 - 148
AnKi/Core/StatsSet.h

@@ -21,7 +21,7 @@ enum class StatFlag : U16
 	kNone = 0,
 
 	kZeroEveryFrame = 1 << 0,
-	kThreadSafe = 1 << 1,
+	kMainThreadUpdates = 1 << 1, ///< Can only be updated from the main thread.
 	kFloat = 1 << 2,
 
 	kShowAverage = 1 << 3,
@@ -54,16 +54,24 @@ class StatCounter
 public:
 	/// Construct.
 	/// @param name Name of the counter. The object will share ownership of the pointer.
-	StatCounter(StatCategory category, const Char* name, StatFlag flags = StatFlag::kNone);
+	StatCounter(StatCategory category, const Char* name, StatFlag flags);
 
 	template<std::integral T>
 	U64 increment(T value)
 	{
 #if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!(m_flags & StatFlag::kThreadSafe));
 		ANKI_ASSERT(!(m_flags & StatFlag::kFloat));
-		const U64 orig = m_u;
-		m_u += value;
+		checkThread();
+		U64 orig;
+		if(!!(m_flags & StatFlag::kMainThreadUpdates))
+		{
+			orig = m_u;
+			m_u += value;
+		}
+		else
+		{
+			orig = m_atomic.fetchAdd(value);
+		}
 		return orig;
 #else
 		(void)value;
@@ -75,10 +83,20 @@ public:
 	F64 increment(T value)
 	{
 #if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!(m_flags & StatFlag::kThreadSafe));
 		ANKI_ASSERT(!!(m_flags & StatFlag::kFloat));
-		const F64 orig = m_f;
-		m_f += value;
+		checkThread();
+		const F64 orig;
+		if(!!(m_flags & StatFlag::kMainThreadUpdates))
+		{
+			orig = m_f;
+			m_f += value;
+		}
+		else
+		{
+			LockGuard lock(m_floatLock);
+			orig = m_f;
+			m_f += value;
+		}
 		return orig;
 #else
 		(void)value;
@@ -86,58 +104,23 @@ public:
 #endif
 	}
 
-	template<std::integral T>
-	U64 atomicIncrement(T value)
-	{
-#if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!!(m_flags & StatFlag::kThreadSafe));
-		ANKI_ASSERT(!(m_flags & StatFlag::kFloat));
-		return m_atomic.fetchAdd(value);
-#else
-		(void)value;
-		return 0;
-#endif
-	}
-
-	template<std::floating_point T>
-	F64 atomicIncrement(T value)
-	{
-#if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!!(m_flags & StatFlag::kThreadSafe));
-		ANKI_ASSERT(!!(m_flags & StatFlag::kFloat));
-		LockGuard lock(m_floatLock);
-		return increment(value);
-#else
-		(void)value;
-		return 0.0;
-#endif
-	}
-
 	template<std::integral T>
 	U64 decrement(T value)
 	{
 #if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!(m_flags & StatFlag::kThreadSafe));
 		ANKI_ASSERT(!(m_flags & StatFlag::kFloat));
-		const U64 orig = m_u;
-		ANKI_ASSERT(orig >= value);
-		m_u -= value;
-		return orig;
-#else
-		(void)value;
-		return 0;
-#endif
-	}
-
-	template<std::floating_point T>
-	F64 decrement(T value)
-	{
-#if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!(m_flags & StatFlag::kThreadSafe));
-		ANKI_ASSERT(!!(m_flags & StatFlag::kFloat));
-		const F64 orig = m_f;
+		checkThread();
+		U64 orig;
+		if(!!(m_flags & StatFlag::kMainThreadUpdates))
+		{
+			orig = m_u;
+			m_u -= value;
+		}
+		else
+		{
+			orig = m_atomic.fetchSub(value);
+		}
 		ANKI_ASSERT(orig >= value);
-		m_f -= value;
 		return orig;
 #else
 		(void)value;
@@ -145,43 +128,22 @@ public:
 #endif
 	}
 
-	template<std::integral T>
-	U64 atomicDecrement(T value)
-	{
-#if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!!(m_flags & StatFlag::kThreadSafe));
-		ANKI_ASSERT(!(m_flags & StatFlag::kFloat));
-		const U64 out = m_atomic.fetchSub(value);
-		ANKI_ASSERT(out >= value);
-		return out;
-#else
-		(void)value;
-		return 0;
-#endif
-	}
-
-	template<std::floating_point T>
-	F64 atomicDecrement(T value)
-	{
-#if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!!(m_flags & StatFlag::kThreadSafe));
-		ANKI_ASSERT(!!(m_flags & StatFlag::kFloat));
-		LockGuard lock(m_floatLock);
-		return decrement(value);
-#else
-		(void)value;
-		return 0;
-#endif
-	}
-
 	template<std::integral T>
 	U64 set(T value)
 	{
 #if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!(m_flags & StatFlag::kThreadSafe));
 		ANKI_ASSERT(!(m_flags & StatFlag::kFloat));
-		const U64 orig = m_u;
-		m_u = value;
+		checkThread();
+		U64 orig;
+		if(!!(m_flags & StatFlag::kMainThreadUpdates))
+		{
+			orig = m_u;
+			m_u = value;
+		}
+		else
+		{
+			orig = m_atomic.exchange(value);
+		}
 		return orig;
 #else
 		(void)value;
@@ -193,10 +155,20 @@ public:
 	F64 set(T value)
 	{
 #if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!(m_flags & StatFlag::kThreadSafe));
 		ANKI_ASSERT(!!(m_flags & StatFlag::kFloat));
-		const F64 orig = m_f;
-		m_f = value;
+		checkThread();
+		F64 orig;
+		if(!!(m_flags & StatFlag::kMainThreadUpdates))
+		{
+			orig = m_f;
+			m_f = value;
+		}
+		else
+		{
+			LockGuard lock(m_floatLock);
+			orig = m_f;
+			m_f = value;
+		}
 		return orig;
 #else
 		(void)value;
@@ -204,40 +176,13 @@ public:
 #endif
 	}
 
-	template<std::integral T>
-	U64 atomicSet(T value)
-	{
-#if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!!(m_flags & StatFlag::kThreadSafe));
-		ANKI_ASSERT(!(m_flags & StatFlag::kFloat));
-		return m_atomic.exchange(value);
-#else
-		(void)value;
-		return 0;
-#endif
-	}
-
-	template<std::floating_point T>
-	F64 atomicSet(T value)
-	{
-#if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!!(m_flags & StatFlag::kThreadSafe));
-		ANKI_ASSERT(!!(m_flags & StatFlag::kFloat));
-		LockGuard lock(m_floatLock);
-		return set(value);
-#else
-		(void)value;
-		return 0;
-#endif
-	}
-
 	template<std::integral T>
 	U64 getValue() const
 	{
 #if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!!(m_flags & StatFlag::kThreadSafe));
-		ANKI_ASSERT(!!(m_flags & StatFlag::kFloat));
-		return m_u;
+		ANKI_ASSERT(!(m_flags & StatFlag::kFloat));
+		checkThread();
+		return !!(m_flags & StatFlag::kMainThreadUpdates) ? m_u : m_atomic.load();
 #else
 		return 0;
 #endif
@@ -247,9 +192,17 @@ public:
 	F64 getValue() const
 	{
 #if ANKI_STATS_ENABLED
-		ANKI_ASSERT(!!(m_flags & StatFlag::kThreadSafe));
-		ANKI_ASSERT(!(m_flags & StatFlag::kFloat));
-		return m_f;
+		ANKI_ASSERT(!!(m_flags & StatFlag::kFloat));
+		checkThread();
+		if(!!(m_flags & StatFlag::kMainThreadUpdates))
+		{
+			return m_f;
+		}
+		else
+		{
+			LockGuard lock(m_floatLock);
+			return m_f;
+		}
 #else
 		return -1.0;
 #endif
@@ -264,6 +217,12 @@ private:
 		F64 m_f;
 	};
 
+	union
+	{
+		U64 m_prevValueu = 0;
+		F64 m_prevValuef;
+	};
+
 	const Char* m_name = nullptr;
 
 	mutable SpinLock m_floatLock;
@@ -271,19 +230,7 @@ private:
 	StatFlag m_flags = StatFlag::kNone;
 	StatCategory m_category = StatCategory::kCount;
 
-	template<std::floating_point T>
-	F64& get()
-	{
-		ANKI_ASSERT(!!(m_flags & StatFlag::kFloat));
-		return m_f;
-	}
-
-	template<std::integral T>
-	U64& get()
-	{
-		ANKI_ASSERT(!(m_flags & StatFlag::kFloat));
-		return m_u;
-	}
+	void checkThread() const;
 #endif
 };
 
@@ -296,6 +243,7 @@ class StatsSet : public MakeSingletonSimple<StatsSet>
 	friend class MakeSingletonSimple;
 
 public:
+	/// @note Not thread-safe.
 	template<typename TFuncUint, typename TFuncFloat>
 	void iterateStats(TFuncUint funcUint, TFuncFloat funcFloat)
 	{
@@ -305,22 +253,11 @@ public:
 			const StatCounter& counter = *m_statCounterArr[i];
 			if(!!(counter.m_flags & StatFlag::kFloat))
 			{
-				F64 value;
-				if(!!(counter.m_flags & StatFlag::kThreadSafe))
-				{
-					LockGuard lock(counter.m_floatLock);
-					value = counter.m_f;
-				}
-				else
-				{
-					value = counter.m_f;
-				}
-				funcFloat(counter.m_category, counter.m_name, value, counter.m_flags);
+				funcFloat(counter.m_category, counter.m_name, counter.m_prevValuef, counter.m_flags);
 			}
 			else
 			{
-				const U64 value = !!(counter.m_flags & StatFlag::kThreadSafe) ? counter.m_atomic.load() : counter.m_u;
-				funcUint(counter.m_category, counter.m_name, value, counter.m_flags);
+				funcUint(counter.m_category, counter.m_name, counter.m_prevValueu, counter.m_flags);
 			}
 		}
 #else
@@ -329,6 +266,7 @@ public:
 #endif
 	}
 
+	/// @note Not thread-safe.
 	void endFrame()
 #if ANKI_STATS_ENABLED
 		;
@@ -337,6 +275,7 @@ public:
 	}
 #endif
 
+	/// @note Thread-safe.
 	U32 getCounterCount() const
 	{
 #if ANKI_STATS_ENABLED
@@ -351,6 +290,7 @@ private:
 	StatCounter** m_statCounterArr = nullptr;
 	U32 m_statCounterArrSize = 0;
 	U32 m_statCounterArrStorageSize = 0;
+	U64 m_mainThreadId = kMaxU64;
 #endif
 
 	StatsSet() = default;
@@ -379,6 +319,16 @@ inline StatCounter::StatCounter(StatCategory category, const Char* name, StatFla
 	(void)flags;
 }
 #endif
+
+#if ANKI_STATS_ENABLED
+inline void StatCounter::checkThread() const
+{
+	if(!!(m_flags & StatFlag::kMainThreadUpdates))
+	{
+		ANKI_ASSERT(StatsSet::getSingleton().m_mainThreadId == Thread::getCurrentThreadId() && "Counter can only be updated from the main thread");
+	}
+}
+#endif
 /// @}
 
 } // end namespace anki

+ 3 - 3
AnKi/Gr/Vulkan/CommandBufferFactory.cpp

@@ -9,7 +9,7 @@
 
 namespace anki {
 
-static StatCounter g_commandBufferCountStat(StatCategory::kMisc, "CommandBufferCount", StatFlag::kThreadSafe);
+static StatCounter g_commandBufferCountStatVar(StatCategory::kMisc, "CommandBufferCount", StatFlag::kNone);
 
 static VulkanQueueType getQueueTypeFromCommandBufferFlags(CommandBufferFlag flags, const VulkanQueueFamilies& queueFamilies)
 {
@@ -40,7 +40,7 @@ MicroCommandBuffer::~MicroCommandBuffer()
 		vkFreeCommandBuffers(getVkDevice(), m_threadAlloc->m_pools[m_queue], 1, &m_handle);
 		m_handle = {};
 
-		g_commandBufferCountStat.atomicDecrement(1_U64);
+		g_commandBufferCountStatVar.decrement(1_U64);
 	}
 }
 
@@ -124,7 +124,7 @@ Error CommandBufferThreadAllocator::newCommandBuffer(CommandBufferFlag cmdbFlags
 		ci.commandBufferCount = 1;
 
 		ANKI_TRACE_INC_COUNTER(VkCommandBufferCreate, 1);
-		g_commandBufferCountStat.atomicIncrement(1_U64);
+		g_commandBufferCountStatVar.increment(1_U64);
 		VkCommandBuffer cmdb;
 		ANKI_VK_CHECK(vkAllocateCommandBuffers(getVkDevice(), &ci, &cmdb));
 

+ 1 - 1
AnKi/Gr/Vulkan/CommandBufferImpl.h

@@ -331,7 +331,7 @@ public:
 		const BufferImpl& countBufferImpl = static_cast<const BufferImpl&>(*countBuffer);
 		ANKI_ASSERT(countBufferImpl.usageValid(BufferUsageBit::kIndirectDraw));
 		ANKI_ASSERT((countBufferOffset % 4) == 0);
-		ANKI_ASSERT(countBufferOffset + maxDrawCount * sizeof(U32) <= countBuffer->getSize());
+		ANKI_ASSERT(countBufferOffset + sizeof(U32) <= countBuffer->getSize());
 
 		ANKI_ASSERT(maxDrawCount > 0 && maxDrawCount <= getGrManagerImpl().getDeviceCapabilities().m_maxDrawIndirectCount);
 

+ 2 - 2
AnKi/Gr/Vulkan/GpuMemoryManager.cpp

@@ -11,10 +11,10 @@ namespace anki {
 
 static StatCounter g_deviceMemoryAllocatedStatVar(StatCategory::kGpuMem, "Device mem", StatFlag::kBytes);
 static StatCounter g_deviceMemoryInUseStatVar(StatCategory::kGpuMem, "Device mem in use", StatFlag::kBytes);
-static StatCounter g_deviceMemoryAllocationCountStatVar(StatCategory::kGpuMem, "Device mem allocations");
+static StatCounter g_deviceMemoryAllocationCountStatVar(StatCategory::kGpuMem, "Device mem allocations", StatFlag::kNone);
 static StatCounter g_hostMemoryAllocatedStatVar(StatCategory::kGpuMem, "Host mem", StatFlag::kBytes);
 static StatCounter g_hostMemoryInUseStatVar(StatCategory::kGpuMem, "Host mem in use", StatFlag::kBytes);
-static StatCounter g_hostMemoryAllocationCountStatVar(StatCategory::kGpuMem, "Host mem allocations");
+static StatCounter g_hostMemoryAllocationCountStatVar(StatCategory::kGpuMem, "Host mem allocations", StatFlag::kNone);
 
 static constexpr Array<GpuMemoryManagerClassInfo, 7> kClasses{
 	{{4_KB, 256_KB}, {128_KB, 8_MB}, {1_MB, 64_MB}, {16_MB, 128_MB}, {64_MB, 128_MB}, {128_MB, 128_MB}, {256_MB, 256_MB}}};

+ 5 - 2
AnKi/Renderer/FinalComposite.cpp

@@ -165,8 +165,11 @@ void FinalComposite::run(RenderPassWorkContext& rgraphCtx)
 			rgraphCtx.bindColorTexture(0, 8, getRenderer().getDbg().getRt());
 		}
 
-		const UVec4 pc(0, 0, floatBitsToUint(g_filmGrainStrengthCVar.get()), getRenderer().getFrameCount() & kMaxU32);
-		cmdb.setPushConstants(&pc, sizeof(pc));
+		if(g_filmGrainStrengthCVar.get() > 0.0f)
+		{
+			const UVec4 pc(0, 0, floatBitsToUint(g_filmGrainStrengthCVar.get()), getRenderer().getFrameCount() & kMaxU32);
+			cmdb.setPushConstants(&pc, sizeof(pc));
+		}
 	}
 	else
 	{

+ 2 - 2
AnKi/Renderer/IndirectDiffuseProbes.cpp

@@ -23,8 +23,8 @@ static NumericCVar<U32> g_indirectDiffuseProbeTileResolutionCVar(CVarSubsystem::
 static NumericCVar<U32> g_indirectDiffuseProbeShadowMapResolutionCVar(CVarSubsystem::kRenderer, "IndirectDiffuseProbeShadowMapResolution", 128, 4,
 																	  2048, "GI shadowmap resolution");
 
-static StatCounter g_giProbeRenderCountStatVar(StatCategory::kRenderer, "GI probes rendered");
-static StatCounter g_giProbeCellsRenderCountStatVar(StatCategory::kRenderer, "GI probes cells rendered");
+static StatCounter g_giProbeRenderCountStatVar(StatCategory::kRenderer, "GI probes rendered", StatFlag::kMainThreadUpdates);
+static StatCounter g_giProbeCellsRenderCountStatVar(StatCategory::kRenderer, "GI probes cells rendered", StatFlag::kMainThreadUpdates);
 
 static Vec3 computeCellCenter(U32 cellIdx, const GlobalIlluminationProbeComponent& probe)
 {

+ 5 - 4
AnKi/Renderer/MainRenderer.cpp

@@ -19,8 +19,9 @@
 
 namespace anki {
 
-static StatCounter g_rendererCpuTime(StatCategory::kTime, "Renderer", StatFlag::kMilisecond | StatFlag::kShowAverage);
-StatCounter g_rendererGpuTime(StatCategory::kTime, "GPU frame", StatFlag::kMilisecond | StatFlag::kShowAverage);
+static StatCounter g_rendererCpuTimeStatVar(StatCategory::kTime, "Renderer",
+											StatFlag::kMilisecond | StatFlag::kShowAverage | StatFlag::kMainThreadUpdates);
+StatCounter g_rendererGpuTimeStatVar(StatCategory::kTime, "GPU frame", StatFlag::kMilisecond | StatFlag::kShowAverage | StatFlag::kMainThreadUpdates);
 
 MainRenderer::MainRenderer()
 {
@@ -161,11 +162,11 @@ Error MainRenderer::render(Texture* presentTex)
 	// Stats
 	if(ANKI_STATS_ENABLED || ANKI_TRACING_ENABLED)
 	{
-		g_rendererCpuTime.set((HighRezTimer::getCurrentTime() - startTime) * 1000.0);
+		g_rendererCpuTimeStatVar.set((HighRezTimer::getCurrentTime() - startTime) * 1000.0);
 
 		RenderGraphStatistics rgraphStats;
 		m_rgraph->getStatistics(rgraphStats);
-		g_rendererGpuTime.set(rgraphStats.m_gpuTime * 1000.0);
+		g_rendererGpuTimeStatVar.set(rgraphStats.m_gpuTime * 1000.0);
 
 		if(rgraphStats.m_gpuTime > 0.0)
 		{

+ 1 - 1
AnKi/Renderer/ProbeReflections.cpp

@@ -25,7 +25,7 @@ static NumericCVar<U32> g_probeReflectionIrradianceResolutionCVar(CVarSubsystem:
 																  "Reflection probe irradiance resolution");
 static NumericCVar<U32> g_probeReflectionShadowMapResolutionCVar(CVarSubsystem::kRenderer, "ProbeReflectionShadowMapResolution", 64, 4, 2048,
 																 "Reflection probe shadow resolution");
-static StatCounter g_probeReflectionCountStatVar(StatCategory::kRenderer, "Reflection probes rendered");
+static StatCounter g_probeReflectionCountStatVar(StatCategory::kRenderer, "Reflection probes rendered", StatFlag::kMainThreadUpdates);
 
 Error ProbeReflections::init()
 {

+ 1 - 0
AnKi/Renderer/Renderer.cpp

@@ -306,6 +306,7 @@ Error Renderer::initInternal(UVec2 swapchainResolution)
 	ANKI_CHECK(m_nonRenderablesVisibility.init());
 	ANKI_CHECK(m_asVisibility.init());
 	ANKI_CHECK(m_hzbGenerator.init());
+	ANKI_CHECK(m_sceneDrawer.init());
 
 	return Error::kNone;
 }

+ 1 - 1
AnKi/Renderer/ShadowMapping.cpp

@@ -23,7 +23,7 @@ static NumericCVar<U32> g_shadowMappingTileCountPerRowOrColumnCVar(CVarSubsystem
 NumericCVar<U32> g_shadowMappingPcfCVar(CVarSubsystem::kRenderer, "ShadowMappingPcf", (ANKI_PLATFORM_MOBILE) ? 0 : 1, 0, 1,
 										"Shadow PCF (CVarSubsystem::kRenderer, 0: off, 1: on)");
 
-static StatCounter g_tilesAllocatedStatVar(StatCategory::kRenderer, "Shadow tiles (re)allocated");
+static StatCounter g_tilesAllocatedStatVar(StatCategory::kRenderer, "Shadow tiles (re)allocated", StatFlag::kMainThreadUpdates);
 
 class LightHash
 {

+ 90 - 0
AnKi/Renderer/Utils/Drawer.cpp

@@ -13,14 +13,41 @@
 #include <AnKi/Core/GpuMemory/UnifiedGeometryBuffer.h>
 #include <AnKi/Core/GpuMemory/RebarTransientMemoryPool.h>
 #include <AnKi/Core/GpuMemory/GpuSceneBuffer.h>
+#include <AnKi/Core/StatsSet.h>
 #include <AnKi/Scene/RenderStateBucket.h>
 
 namespace anki {
 
+static StatCounter g_executedDrawcallsStatVar(StatCategory::kRenderer, "Visible objects", StatFlag::kZeroEveryFrame);
+static StatCounter g_maxDrawcallsStatVar(StatCategory::kRenderer, "Objects tested for visibility", StatFlag::kZeroEveryFrame);
+
 RenderableDrawer::~RenderableDrawer()
 {
 }
 
+Error RenderableDrawer::init()
+{
+#if ANKI_STATS_ENABLED
+	constexpr Array<MutatorValue, 3> kColorAttachmentCounts = {0, 1, 4};
+
+	U32 count = 0;
+	for(MutatorValue attachmentCount : kColorAttachmentCounts)
+	{
+		ANKI_CHECK(loadShaderProgram("ShaderBinaries/DrawerStats.ankiprogbin",
+									 Array<SubMutation, 2>{{{"CLEAR_COUNTER_BUFFER", 0}, {"COLOR_ATTACHMENT_COUNT", attachmentCount}}},
+									 m_stats.m_statsProg, m_stats.m_updateStatsGrProgs[count]));
+
+		ANKI_CHECK(loadShaderProgram("ShaderBinaries/DrawerStats.ankiprogbin",
+									 Array<SubMutation, 2>{{{"CLEAR_COUNTER_BUFFER", 1}, {"COLOR_ATTACHMENT_COUNT", attachmentCount}}},
+									 m_stats.m_statsProg, m_stats.m_resetCounterGrProgs[count]));
+
+		++count;
+	}
+#endif
+
+	return Error::kNone;
+}
+
 void RenderableDrawer::setState(const RenderableDrawerArguments& args, CommandBuffer& cmdb)
 {
 	// Allocate, set and bind global uniforms
@@ -55,6 +82,50 @@ void RenderableDrawer::setState(const RenderableDrawerArguments& args, CommandBu
 
 void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuffer& cmdb)
 {
+#if ANKI_STATS_ENABLED
+	U32 variant;
+	switch(args.m_renderingTechinuqe)
+	{
+	case RenderingTechnique::kGBuffer:
+		variant = 2;
+		break;
+	case RenderingTechnique::kForward:
+		variant = 1;
+		break;
+	case RenderingTechnique::kDepth:
+		variant = 0;
+		break;
+	default:
+		ANKI_ASSERT(0);
+	}
+
+	{
+		LockGuard lock(m_stats.m_mtx);
+
+		if(m_stats.m_frameIdx != getRenderer().getFrameCount())
+		{
+			m_stats.m_frameIdx = getRenderer().getFrameCount();
+
+			// Get previous stats
+			U32 prevFrameCount;
+			PtrSize dataRead;
+			getRenderer().getReadbackManager().readMostRecentData(m_stats.m_readback, &prevFrameCount, sizeof(prevFrameCount), dataRead);
+			if(dataRead > 0) [[likely]]
+			{
+				g_executedDrawcallsStatVar.set(prevFrameCount);
+			}
+
+			// Get place to write new stats
+			getRenderer().getReadbackManager().allocateData(m_stats.m_readback, sizeof(U32), m_stats.m_statsBuffer, m_stats.m_statsBufferOffset);
+
+			// First drawcall clears the counter buffer for this frame
+			cmdb.bindShaderProgram(m_stats.m_resetCounterGrProgs[variant].get());
+			cmdb.bindStorageBuffer(0, 0, m_stats.m_statsBuffer, m_stats.m_statsBufferOffset, sizeof(U32));
+			cmdb.draw(PrimitiveTopology::kTriangles, 6);
+		}
+	}
+#endif
+
 	setState(args, cmdb);
 
 	cmdb.bindVertexBuffer(0, args.m_instanceRateRenderablesBuffer.m_buffer, args.m_instanceRateRenderablesBuffer.m_offset,
@@ -95,6 +166,25 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 	});
 
 	ANKI_ASSERT(bucketCount == RenderStateBucketContainer::getSingleton().getBucketCount(args.m_renderingTechinuqe));
+
+	// Update the stats
+#if ANKI_STATS_ENABLED
+	{
+		LockGuard lock(m_stats.m_mtx);
+
+		U32* counter;
+		BufferOffsetRange threadCountBuff = RebarTransientMemoryPool::getSingleton().allocateFrame(sizeof(U32), counter);
+		*counter = 0;
+
+		cmdb.bindShaderProgram(m_stats.m_updateStatsGrProgs[variant].get());
+		cmdb.bindStorageBuffer(0, 0, m_stats.m_statsBuffer, m_stats.m_statsBufferOffset, sizeof(U32));
+		cmdb.bindStorageBuffer(0, 1, threadCountBuff);
+		cmdb.bindStorageBuffer(0, 2, args.m_mdiDrawCountsBuffer);
+		cmdb.draw(PrimitiveTopology::kTriangles, 6);
+	}
+
+	g_maxDrawcallsStatVar.increment(allUserCount);
+#endif
 }
 
 } // end namespace anki

+ 24 - 10
AnKi/Renderer/Utils/Drawer.h

@@ -6,16 +6,11 @@
 #pragma once
 
 #include <AnKi/Renderer/Common.h>
-#include <AnKi/Resource/RenderingKey.h>
 #include <AnKi/Renderer/Utils/GpuVisibility.h>
 #include <AnKi/Gr.h>
 
 namespace anki {
 
-// Forward
-class Renderer;
-class RenderableQueueElement;
-
 /// @addtogroup renderer
 /// @{
 
@@ -29,7 +24,7 @@ public:
 	Mat4 m_viewProjectionMatrix;
 	Mat4 m_previousViewProjectionMatrix;
 
-	Sampler* m_sampler;
+	Sampler* m_sampler = nullptr;
 
 	// For MDI
 	RenderingTechnique m_renderingTechinuqe = RenderingTechnique::kCount;
@@ -46,20 +41,39 @@ public:
 	}
 };
 
-/// It uses the render queue to batch and render.
-class RenderableDrawer
+/// It uses visibility data to issue drawcalls.
+class RenderableDrawer : public RendererObject
 {
-	friend class RenderTask;
-
 public:
 	RenderableDrawer() = default;
 
 	~RenderableDrawer();
 
+	Error init();
+
 	/// Draw using multidraw indirect.
+	/// @note It's thread-safe.
 	void drawMdi(const RenderableDrawerArguments& args, CommandBuffer& cmdb);
 
 private:
+#if ANKI_STATS_ENABLED
+	class
+	{
+	public:
+		MultiframeReadbackToken m_readback;
+
+		ShaderProgramResourcePtr m_statsProg;
+		Array<ShaderProgramPtr, 3> m_updateStatsGrProgs;
+		Array<ShaderProgramPtr, 3> m_resetCounterGrProgs;
+
+		U64 m_frameIdx = kMaxU64;
+		SpinLock m_mtx;
+
+		Buffer* m_statsBuffer = nullptr;
+		PtrSize m_statsBufferOffset = 0;
+	} m_stats;
+#endif
+
 	void setState(const RenderableDrawerArguments& args, CommandBuffer& cmdb);
 };
 /// @}

+ 7 - 56
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -16,36 +16,24 @@
 
 namespace anki {
 
-static StatCounter g_visibleObjects(StatCategory::kRenderer, "Visible objects", StatFlag::kZeroEveryFrame);
-static StatCounter g_testedObjects(StatCategory::kRenderer, "Visbility tested objects", StatFlag::kZeroEveryFrame);
-
 Error GpuVisibility::init()
 {
 	for(MutatorValue hzb = 0; hzb < 2; ++hzb)
 	{
 		for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
 		{
-			ANKI_CHECK(loadShaderProgram(
-				"ShaderBinaries/GpuVisibility.ankiprogbin",
-				Array<SubMutation, 4>{{{"HZB_TEST", hzb}, {"STATS", ANKI_STATS_ENABLED}, {"DISTANCE_TEST", 0}, {"GATHER_AABBS", gatherAabbs}}},
-				m_prog, m_frustumGrProgs[hzb][gatherAabbs]));
+			ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibility.ankiprogbin",
+										 Array<SubMutation, 3>{{{"HZB_TEST", hzb}, {"DISTANCE_TEST", 0}, {"GATHER_AABBS", gatherAabbs}}}, m_prog,
+										 m_frustumGrProgs[hzb][gatherAabbs]));
 		}
 	}
 
 	for(MutatorValue gatherAabbs = 0; gatherAabbs < 2; ++gatherAabbs)
 	{
-		ANKI_CHECK(loadShaderProgram(
-			"ShaderBinaries/GpuVisibility.ankiprogbin",
-			Array<SubMutation, 4>{{{"HZB_TEST", 0}, {"STATS", ANKI_STATS_ENABLED}, {"DISTANCE_TEST", 1}, {"GATHER_AABBS", gatherAabbs}}}, m_prog,
-			m_distGrProgs[gatherAabbs]));
-	}
-
-#if ANKI_STATS_ENABLED
-	for(GpuReadbackMemoryAllocation& alloc : m_readbackMemory)
-	{
-		alloc = GpuReadbackMemoryPool::getSingleton().allocate(sizeof(U32));
+		ANKI_CHECK(loadShaderProgram("ShaderBinaries/GpuVisibility.ankiprogbin",
+									 Array<SubMutation, 3>{{{"HZB_TEST", 0}, {"DISTANCE_TEST", 1}, {"GATHER_AABBS", gatherAabbs}}}, m_prog,
+									 m_distGrProgs[gatherAabbs]));
 	}
-#endif
 
 	return Error::kNone;
 }
@@ -125,33 +113,6 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 
 	const U32 bucketCount = RenderStateBucketContainer::getSingleton().getBucketCount(in.m_technique);
 
-#if ANKI_STATS_ENABLED
-	Bool firstCallInTheFrame = false;
-	if(m_lastFrameIdx != getRenderer().getFrameCount())
-	{
-		firstCallInTheFrame = true;
-		m_lastFrameIdx = getRenderer().getFrameCount();
-	}
-
-	const GpuReadbackMemoryAllocation& readAlloc = m_readbackMemory[(m_lastFrameIdx + 1) % m_readbackMemory.getSize()];
-	const GpuReadbackMemoryAllocation& writeAlloc = m_readbackMemory[m_lastFrameIdx % m_readbackMemory.getSize()];
-
-	Buffer* clearStatsBuffer = &readAlloc.getBuffer();
-	const PtrSize clearStatsBufferOffset = readAlloc.getOffset();
-	Buffer* writeStatsBuffer = &writeAlloc.getBuffer();
-	const PtrSize writeStatsBufferOffset = writeAlloc.getOffset();
-
-	if(firstCallInTheFrame)
-	{
-		U32 visibleCount;
-		memcpy(&visibleCount, readAlloc.getMappedMemory(), sizeof(visibleCount));
-
-		g_visibleObjects.set(visibleCount);
-	}
-
-	g_testedObjects.increment(aabbCount);
-#endif
-
 	// Allocate memory for the indirect commands
 	const GpuVisibleTransientMemoryAllocation indirectArgs =
 		GpuVisibleTransientMemoryPool::getSingleton().allocate(aabbCount * sizeof(DrawIndexedIndirectArgs));
@@ -192,12 +153,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 
 	pass.setWork([this, frustumTestData, distTestData, lodReferencePoint = in.m_lodReferencePoint, lodDistances = in.m_lodDistances,
 				  technique = in.m_technique, mdiDrawCountsHandle = out.m_mdiDrawCountsHandle, instanceRateRenderables, indirectArgs, aabbCount,
-				  visibleAabbsBuffer = out.m_visibleAaabbIndicesBuffer
-#if ANKI_STATS_ENABLED
-				  ,
-				  clearStatsBuffer, clearStatsBufferOffset, writeStatsBuffer, writeStatsBufferOffset
-#endif
-	](RenderPassWorkContext& rpass) {
+				  visibleAabbsBuffer = out.m_visibleAaabbIndicesBuffer](RenderPassWorkContext& rpass) {
 		CommandBuffer& cmdb = *rpass.m_commandBuffer;
 
 		const Bool gatherAabbIndices = visibleAabbsBuffer.m_buffer != nullptr;
@@ -287,11 +243,6 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 			cmdb.setPushConstants(&unis, sizeof(unis));
 		}
 
-#if ANKI_STATS_ENABLED
-		cmdb.bindStorageBuffer(0, 10, writeStatsBuffer, writeStatsBufferOffset, sizeof(U32));
-		cmdb.bindStorageBuffer(0, 11, clearStatsBuffer, clearStatsBufferOffset, sizeof(U32));
-#endif
-
 		if(gatherAabbIndices)
 		{
 			cmdb.bindStorageBuffer(0, 12, visibleAabbsBuffer);

+ 0 - 5
AnKi/Renderer/Utils/GpuVisibility.h

@@ -81,11 +81,6 @@ private:
 	Array2d<ShaderProgramPtr, 2, 2> m_frustumGrProgs;
 	Array<ShaderProgramPtr, 2> m_distGrProgs;
 
-#if ANKI_STATS_ENABLED
-	Array<GpuReadbackMemoryAllocation, kMaxFramesInFlight> m_readbackMemory;
-	U64 m_lastFrameIdx = kMaxU64;
-#endif
-
 	void populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out);
 };
 

+ 1 - 1
AnKi/Renderer/Utils/Readback.cpp

@@ -70,7 +70,7 @@ void ReadbackManager::readMostRecentData(const MultiframeReadbackToken& token, v
 	const GpuReadbackMemoryAllocation& allocation = token.m_allocations[slot];
 	dataOut = min(dataSize, PtrSize(allocation.getAllocatedSize()));
 
-	memcpy(data, static_cast<const U8*>(allocation.getMappedMemory()) + allocation.getOffset(), dataOut);
+	memcpy(data, static_cast<const U8*>(allocation.getMappedMemory()), dataOut);
 }
 
 void ReadbackManager::endFrame(Fence* fence)

+ 5 - 3
AnKi/Renderer/Utils/Readback.h

@@ -28,9 +28,7 @@ private:
 class ReadbackManager
 {
 public:
-	void allocateData(MultiframeReadbackToken& token, PtrSize size, Buffer*& buffer, PtrSize& bufferOffset) const;
-
-	/// Read the most up to date data from the GPU.
+	/// Read the most up to date data from the GPU. 1st thing to call in a frame.
 	void readMostRecentData(const MultiframeReadbackToken& token, void* data, PtrSize dataSize, PtrSize& dataOut) const;
 
 	/// Read the most up to date data from the GPU.
@@ -51,6 +49,10 @@ public:
 		}
 	}
 
+	/// Allocate new data for the following frame. 2nd thing to call in a frame.
+	void allocateData(MultiframeReadbackToken& token, PtrSize size, Buffer*& buffer, PtrSize& bufferOffset) const;
+
+	/// Last thing to call in a frame.
 	void endFrame(Fence* fence);
 
 private:

+ 3 - 3
AnKi/Resource/AsyncLoader.cpp

@@ -10,7 +10,7 @@
 
 namespace anki {
 
-static StatCounter g_asyncTasksInFlightStatVar(StatCategory::kMisc, "Async loader tasks", StatFlag::kThreadSafe);
+static StatCounter g_asyncTasksInFlightStatVar(StatCategory::kMisc, "Async loader tasks", StatFlag::kNone);
 
 AsyncLoader::AsyncLoader()
 	: m_thread("AsyncLoad")
@@ -94,7 +94,7 @@ Error AsyncLoader::threadWorker()
 			{
 				ANKI_TRACE_SCOPED_EVENT(RsrcAsyncTask);
 				err = (*task)(ctx);
-				g_asyncTasksInFlightStatVar.atomicDecrement(1);
+				g_asyncTasksInFlightStatVar.decrement(1);
 			}
 
 			if(!err)
@@ -128,7 +128,7 @@ void AsyncLoader::submitTask(AsyncLoaderTask* task)
 	ANKI_ASSERT(task);
 
 	m_tasksInFlightCount.fetchAdd(1);
-	g_asyncTasksInFlightStatVar.atomicIncrement(1);
+	g_asyncTasksInFlightStatVar.increment(1);
 
 	LockGuard<Mutex> lock(m_mtx);
 	m_taskQueue.pushBack(task);

+ 6 - 4
AnKi/Scene/SceneGraph.cpp

@@ -38,8 +38,10 @@
 
 namespace anki {
 
-static StatCounter g_sceneUpdateTime(StatCategory::kTime, "All scene update", StatFlag::kMilisecond | StatFlag::kShowAverage);
-static StatCounter g_scenePhysicsTime(StatCategory::kTime, "Physics", StatFlag::kMilisecond | StatFlag::kShowAverage);
+static StatCounter g_sceneUpdateTimeStatVar(StatCategory::kTime, "All scene update",
+											StatFlag::kMilisecond | StatFlag::kShowAverage | StatFlag::kMainThreadUpdates);
+static StatCounter g_scenePhysicsTimeStatVar(StatCategory::kTime, "Physics",
+											 StatFlag::kMilisecond | StatFlag::kShowAverage | StatFlag::kMainThreadUpdates);
 
 static NumericCVar<U32> g_octreeMaxDepthCVar(CVarSubsystem::kScene, "OctreeMaxDepth", 5, 2, 10, "The max depth of the octree");
 
@@ -241,7 +243,7 @@ Error SceneGraph::update(Second prevUpdateTime, Second crntTime)
 
 		PhysicsWorld::getSingleton().update(crntTime - prevUpdateTime);
 
-		g_scenePhysicsTime.set((HighRezTimer::getCurrentTime() - physicsUpdate) * 1000.0);
+		g_scenePhysicsTimeStatVar.set((HighRezTimer::getCurrentTime() - physicsUpdate) * 1000.0);
 	}
 
 	{
@@ -275,7 +277,7 @@ Error SceneGraph::update(Second prevUpdateTime, Second crntTime)
 #define ANKI_CAT_TYPE(arrayName, gpuSceneType, id, cvarName) GpuSceneArrays::arrayName::getSingleton().flush();
 #include <AnKi/Scene/GpuSceneArrays.def.h>
 
-	g_sceneUpdateTime.set((HighRezTimer::getCurrentTime() - startUpdateTime) * 1000.0);
+	g_sceneUpdateTimeStatVar.set((HighRezTimer::getCurrentTime() - startUpdateTime) * 1000.0);
 	return Error::kNone;
 }
 

+ 2 - 2
AnKi/Scene/StatsUiNode.cpp

@@ -153,9 +153,9 @@ void StatsUiNode::draw(CanvasPtr& canvas)
 		}
 		else
 		{
-			const Second maxTime = max(g_cpuTotalTime.getValue<F64>(), g_rendererGpuTime.getValue<F64>()) / 1000.0;
+			const Second maxTime = max(g_cpuTotalTimeStatVar.getValue<F64>(), g_rendererGpuTimeStatVar.getValue<F64>()) / 1000.0;
 			const F32 fps = F32(1.0 / maxTime);
-			const Bool cpuBound = g_cpuTotalTime.getValue<F64>() > g_rendererGpuTime.getValue<F64>();
+			const Bool cpuBound = g_cpuTotalTimeStatVar.getValue<F64>() > g_rendererGpuTimeStatVar.getValue<F64>();
 			ImGui::TextColored((cpuBound) ? Vec4(1.0f, 0.5f, 0.5f, 1.0f) : Vec4(0.5f, 1.0f, 0.5f, 1.0f), "FPS %.1f", fps);
 		}
 	}

+ 0 - 22
AnKi/Shaders/GpuVisibility.ankiprog

@@ -4,7 +4,6 @@
 // http://www.anki3d.org/LICENSE
 
 #pragma anki mutator HZB_TEST 0 1
-#pragma anki mutator STATS 0 1
 #pragma anki mutator DISTANCE_TEST 0 1
 #pragma anki mutator GATHER_AABBS 0 1
 
@@ -52,11 +51,6 @@ struct DrawIndirectArgsWithPadding
 [[vk::binding(9)]] SamplerState g_nearestAnyClampSampler;
 #endif
 
-#if STATS
-[[vk::binding(10)]] RWStructuredBuffer<U32> g_testsPassed;
-[[vk::binding(11)]] RWStructuredBuffer<U32> g_testsPassedClear; ///< Some previous value. Will be cleared this frame
-#endif
-
 #if GATHER_AABBS
 [[vk::binding(12)]] RWStructuredBuffer<U32> g_visibleAabbIndices; ///< Indices of the visible AABBs. The 1st element is the count.
 #endif
@@ -72,13 +66,6 @@ struct DrawIndirectArgsWithPadding
 		return;
 	}
 
-#if STATS
-	if(svDispatchThreadId.x == 0)
-	{
-		g_testsPassedClear[0] = 0;
-	}
-#endif
-
 	const GpuSceneRenderableAabb aabb = g_aabbs[aabbIdx];
 
 #if DISTANCE_TEST == 0
@@ -240,15 +227,6 @@ struct DrawIndirectArgsWithPadding
 	InterlockedAdd(g_visibleAabbIndices[0], 1, index);
 	g_visibleAabbIndices[index + 1] = aabbIdx;
 #endif
-
-	// Now update the stats
-#if STATS
-	const U32 activeLanes = WaveActiveCountBits(true);
-	if(WaveIsFirstLane())
-	{
-		InterlockedAdd(g_testsPassed[0], activeLanes);
-	}
-#endif
 }
 
 #pragma anki end

+ 1 - 0
AnKi/Shaders/Intellisense.hlsl

@@ -21,6 +21,7 @@
 #define out
 #define in
 #define inout
+#define discard return
 
 #define ANKI_BEGIN_NAMESPACE
 #define ANKI_END_NAMESPACE