Browse Source

Merge pull request #34 from godlikepanos/trace

Improve the tracing
Panagiotis Christopoulos Charitos 7 years ago
parent
commit
2750c96ef4
43 changed files with 1255 additions and 779 deletions
  1. 1 17
      programs/DownscaleBlur.ankiprog
  2. 19 0
      programs/DownscaleBlurCompute.ankiprog
  3. 55 0
      shaders/DownscaleBlur.glsl
  4. 1 0
      src/anki/Util.h
  5. 27 11
      src/anki/core/App.cpp
  6. 1 1
      src/anki/core/CMakeLists.txt
  7. 0 278
      src/anki/core/Trace.cpp
  8. 5 173
      src/anki/core/Trace.h
  9. 1 3
      src/anki/gr/gl/CommandBufferImpl.cpp
  10. 7 8
      src/anki/gr/gl/RenderingThread.cpp
  11. 4 3
      src/anki/gr/vulkan/BufferImpl.cpp
  12. 1 3
      src/anki/gr/vulkan/DescriptorSet.cpp
  13. 15 11
      src/anki/gr/vulkan/GrManagerImpl.cpp
  14. 5 3
      src/anki/gr/vulkan/Pipeline.cpp
  15. 2 4
      src/anki/gr/vulkan/TextureImpl.cpp
  16. 109 36
      src/anki/renderer/DownscaleBlur.cpp
  17. 3 0
      src/anki/renderer/DownscaleBlur.h
  18. 1 1
      src/anki/renderer/Drawer.cpp
  19. 2 2
      src/anki/renderer/GBuffer.cpp
  20. 5 5
      src/anki/renderer/Indirect.cpp
  21. 132 126
      src/anki/renderer/LightBin.cpp
  22. 16 11
      src/anki/renderer/LightBin.h
  23. 1 1
      src/anki/renderer/Renderer.cpp
  24. 9 0
      src/anki/renderer/RendererObject.h
  25. 4 4
      src/anki/renderer/ShadowMapping.cpp
  26. 1 4
      src/anki/renderer/Ssao.cpp
  27. 2 1
      src/anki/renderer/TemporalAA.cpp
  28. 1 1
      src/anki/resource/AsyncLoader.cpp
  29. 1 1
      src/anki/resource/MeshLoader.cpp
  30. 5 5
      src/anki/resource/ResourceFilesystem.cpp
  31. 1 1
      src/anki/resource/TransferGpuAllocator.cpp
  32. 30 28
      src/anki/scene/SceneGraph.cpp
  33. 1 1
      src/anki/scene/SceneGraph.h
  34. 1 2
      src/anki/scene/SoftwareRasterizer.cpp
  35. 1 2
      src/anki/scene/VisibilityInternal.h
  36. 6 0
      src/anki/util/Allocator.h
  37. 1 1
      src/anki/util/CMakeLists.txt
  38. 1 1
      src/anki/util/ObjectAllocator.h
  39. 1 0
      src/anki/util/StdTypes.h
  40. 26 30
      src/anki/util/String.h
  41. 571 0
      src/anki/util/Tracer.cpp
  42. 127 0
      src/anki/util/Tracer.h
  43. 52 0
      tests/util/Tracer.cpp

+ 1 - 17
programs/DownscaleBlur.ankiprog

@@ -14,23 +14,7 @@ http://www.anki3d.org/LICENSE
 
 
 		<shader type="frag">
 		<shader type="frag">
 			<source><![CDATA[
 			<source><![CDATA[
-#include "shaders/Common.glsl"
-
-layout(ANKI_TEX_BINDING(0, 0)) uniform sampler2D u_tex;
-
-layout(location = 0) in vec2 in_uv;
-layout(location = 0) out vec3 out_color;
-
-void main()
-{
-	out_color = textureLod(u_tex, in_uv, 0.0).rgb;
-	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(+1, +1)).rgb;
-	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(-1, -1)).rgb;
-	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(+1, -1)).rgb;
-	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(-1, +1)).rgb;
-
-	out_color /= 5.0;
-}
+#include "shaders/DownscaleBlur.glsl"
 			]]></source>
 			]]></source>
 		</shader>
 		</shader>
 	</shaders>
 	</shaders>

+ 19 - 0
programs/DownscaleBlurCompute.ankiprog

@@ -0,0 +1,19 @@
+<!-- 
+Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
+All rights reserved.
+Code licensed under the BSD License.
+http://www.anki3d.org/LICENSE
+-->
+<shaderProgram>
+	<shaders>
+		<shader type="comp">
+			<inputs>
+				<input name="WORKGROUP_SIZE" type="uvec2" const="1"/>
+			</inputs>
+
+			<source><![CDATA[
+#include "shaders/DownscaleBlur.glsl"
+			]]></source>
+		</shader>
+	</shaders>
+</shaderProgram>

+ 55 - 0
shaders/DownscaleBlur.glsl

@@ -0,0 +1,55 @@
+// Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#ifndef ANKI_SHADERS_DOWNSCALE_BLUR_GLSL
+#define ANKI_SHADERS_DOWNSCALE_BLUR_GLSL
+
+#include "shaders/Common.glsl"
+
+layout(ANKI_TEX_BINDING(0, 0)) uniform sampler2D u_tex;
+
+#if defined(ANKI_COMPUTE_SHADER)
+layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
+
+// Push constants hold the size of the output image
+struct PushConsts
+{
+	uvec4 outImageSizePad2;
+};
+ANKI_PUSH_CONSTANTS(PushConsts, u_regs);
+#	define u_fbSize (u_regs.outImageSizePad2.xy)
+
+vec2 in_uv = (vec2(gl_GlobalInvocationID.xy) + 0.5) / vec2(u_fbSize);
+layout(ANKI_IMAGE_BINDING(0, 0)) writeonly uniform image2D out_img;
+vec3 out_color;
+#else
+layout(location = 0) in vec2 in_uv;
+layout(location = 0) out vec3 out_color;
+#endif
+
+void main()
+{
+#if defined(ANKI_COMPUTE_SHADER)
+	if(gl_GlobalInvocationID.x >= u_fbSize.x || gl_GlobalInvocationID.y >= u_fbSize.y)
+	{
+		// Skip pixels outside the viewport
+		return;
+	}
+#endif
+
+	out_color = textureLod(u_tex, in_uv, 0.0).rgb;
+	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(+1, +1)).rgb;
+	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(-1, -1)).rgb;
+	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(+1, -1)).rgb;
+	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(-1, +1)).rgb;
+
+	out_color *= (1.0 / 5.0);
+
+#if defined(ANKI_COMPUTE_SHADER)
+	imageStore(out_img, ivec2(gl_GlobalInvocationID.xy), vec4(out_color, 0.0));
+#endif
+}
+
+#endif

+ 1 - 0
src/anki/Util.h

@@ -36,6 +36,7 @@
 #include <anki/util/INotify.h>
 #include <anki/util/INotify.h>
 #include <anki/util/SparseArray.h>
 #include <anki/util/SparseArray.h>
 #include <anki/util/ObjectAllocator.h>
 #include <anki/util/ObjectAllocator.h>
+#include <anki/util/Tracer.h>
 
 
 /// @defgroup util Utilities (like STL)
 /// @defgroup util Utilities (like STL)
 
 

+ 27 - 11
src/anki/core/App.cpp

@@ -336,12 +336,22 @@ void App::cleanup()
 		m_window = nullptr;
 		m_window = nullptr;
 	}
 	}
 
 
-	m_settingsDir.destroy(m_heapAlloc);
-	m_cacheDir.destroy(m_heapAlloc);
-
 #if ANKI_ENABLE_TRACE
 #if ANKI_ENABLE_TRACE
-	TraceManagerSingleton::destroy();
+	if(TracerSingleton::get().isInitialized())
+	{
+		StringAuto fname(m_heapAlloc);
+		fname.sprintf("%s/trace", m_settingsDir.cstr());
+		ANKI_CORE_LOGI("Will dump trace files: %s", fname.cstr());
+		if(TracerSingleton::get().flush(fname.toCString()))
+		{
+			ANKI_CORE_LOGE("Ignoring error from the tracer");
+		}
+		TracerSingleton::destroy();
+	}
 #endif
 #endif
+
+	m_settingsDir.destroy(m_heapAlloc);
+	m_cacheDir.destroy(m_heapAlloc);
 }
 }
 
 
 Error App::init(const ConfigSet& config, AllocAlignedCallback allocCb, void* allocCbUserData)
 Error App::init(const ConfigSet& config, AllocAlignedCallback allocCb, void* allocCbUserData)
@@ -364,6 +374,11 @@ Error App::initInternal(const ConfigSet& config_, AllocAlignedCallback allocCb,
 	initMemoryCallbacks(allocCb, allocCbUserData);
 	initMemoryCallbacks(allocCb, allocCbUserData);
 	m_heapAlloc = HeapAllocator<U8>(m_allocCb, m_allocCbData);
 	m_heapAlloc = HeapAllocator<U8>(m_allocCb, m_allocCbData);
 
 
+#if ANKI_ENABLE_TRACE
+	TracerSingleton::get().init(m_heapAlloc);
+	TracerSingleton::get().newFrame(0);
+#endif
+
 	ANKI_CHECK(initDirs(config));
 	ANKI_CHECK(initDirs(config));
 
 
 	// Print a message
 	// Print a message
@@ -408,10 +423,6 @@ Error App::initInternal(const ConfigSet& config_, AllocAlignedCallback allocCb,
 	}
 	}
 #endif
 #endif
 
 
-#if ANKI_ENABLE_TRACE
-	ANKI_CHECK(TraceManagerSingleton::get().create(m_heapAlloc, m_settingsDir.toCString()));
-#endif
-
 	ANKI_CORE_LOGI("Number of main threads: %u", U(config.getNumber("core.mainThreadCount")));
 	ANKI_CORE_LOGI("Number of main threads: %u", U(config.getNumber("core.mainThreadCount")));
 
 
 	//
 	//
@@ -533,6 +544,7 @@ Error App::initInternal(const ConfigSet& config_, AllocAlignedCallback allocCb,
 	m_script->setSceneGraph(m_scene);
 	m_script->setSceneGraph(m_scene);
 
 
 	ANKI_CORE_LOGI("Application initialized");
 	ANKI_CORE_LOGI("Application initialized");
+
 	return Error::NONE;
 	return Error::NONE;
 }
 }
 
 
@@ -606,7 +618,11 @@ Error App::mainLoop()
 
 
 	while(!quit)
 	while(!quit)
 	{
 	{
-		ANKI_TRACE_START_FRAME();
+#if ANKI_ENABLE_TRACE
+		static U64 frame = 1;
+		TracerSingleton::get().newFrame(frame++);
+#endif
+		ANKI_TRACE_START_EVENT(FRAME);
 		const Second startTime = HighRezTimer::getCurrentTime();
 		const Second startTime = HighRezTimer::getCurrentTime();
 
 
 		prevUpdateTime = crntTime;
 		prevUpdateTime = crntTime;
@@ -646,6 +662,8 @@ Error App::mainLoop()
 		// Now resume the loader
 		// Now resume the loader
 		m_resources->getAsyncLoader().resume();
 		m_resources->getAsyncLoader().resume();
 
 
+		ANKI_TRACE_STOP_EVENT(FRAME);
+
 		// Sleep
 		// Sleep
 		const Second endTime = HighRezTimer::getCurrentTime();
 		const Second endTime = HighRezTimer::getCurrentTime();
 		const Second frameTime = endTime - startTime;
 		const Second frameTime = endTime - startTime;
@@ -675,8 +693,6 @@ Error App::mainLoop()
 		}
 		}
 
 
 		++m_globalTimestamp;
 		++m_globalTimestamp;
-
-		ANKI_TRACE_STOP_FRAME();
 	}
 	}
 
 
 	return Error::NONE;
 	return Error::NONE;

+ 1 - 1
src/anki/core/CMakeLists.txt

@@ -1,4 +1,4 @@
-set(SOURCES App.cpp Config.cpp Trace.cpp StagingGpuMemoryManager.cpp)
+set(SOURCES App.cpp Config.cpp StagingGpuMemoryManager.cpp)
 
 
 if(SDL)
 if(SDL)
 	set(SOURCES ${SOURCES} NativeWindowSdl.cpp)
 	set(SOURCES ${SOURCES} NativeWindowSdl.cpp)

+ 0 - 278
src/anki/core/Trace.cpp

@@ -1,278 +0,0 @@
-// Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
-// All rights reserved.
-// Code licensed under the BSD License.
-// http://www.anki3d.org/LICENSE
-
-#include <anki/core/Trace.h>
-#include <anki/util/HighRezTimer.h>
-#include <cstdlib>
-
-#if ANKI_ENABLE_TRACE
-
-namespace anki
-{
-
-static Array<const char*, U(TraceEventType::COUNT)> eventNames = {{"RESOURCE_ALLOCATE_TRANSFER",
-	"RESOURCE_ASYNC_TASK",
-	"RESOURCE_FILE_READ",
-	"SCENE_UPDATE",
-	"SCENE_DELETE_STUFF",
-	"SCENE_PHYSICS_UPDATE",
-	"SCENE_NODES_UPDATE",
-	"SCENE_VISIBILITY_TESTS",
-	"VIS_TEST",
-	"VIS_COMBINE_RESULTS",
-	"VIS_ITERATE_SECTORS",
-	"VIS_GATHER_TRIANGLES",
-	"VIS_RASTERIZE",
-	"VIS_RASTERIZER_TEST",
-	"RENDERER_INIT",
-	"RENDER",
-	"RENDER_MS",
-	"RENDER_IS",
-	"RENDER_SM",
-	"RENDER_IR",
-	"RENDER_DRAWER",
-	"RENDERER_COMMAND_BUFFER_BUILDING",
-	"RENDERER_LIGHT_BINNING",
-	"GR_RENDER_GRAPH",
-	"GR_COMMAND_BUFFER_RESET",
-	"GR_SHADER_COMPILE",
-	"GL_THREAD",
-	"GL_2ND_LEVEL_CMD_BUFFER",
-	"GL_BIND_RESOURCES",
-	"GL_BIND_PPLINE",
-	"GL_CMD_BUFFER_DESTROY",
-	"VK_ACQUIRE_IMAGE",
-	"VK_QUEUE_SUBMIT",
-	"VK_PIPELINE_CREATE",
-	"VK_BIND_OBJECT",
-	"VK_DESCRIPTOR_SET_GET_OR_CREATE",
-	"SWAP_BUFFERS",
-	"BARRIER_WAIT",
-	"LUA_EXEC",
-	"TIMER_TICK_SLEEP"}};
-
-static Array<const char*, U(TraceCounterType::COUNT)> counterNames = {{"GR_DRAWCALLS",
-	"GR_VERTICES",
-	"GL_PROGS_SKIPPED",
-	"VK_PIPELINE_BARRIERS",
-	"VK_CMD_BUFFER_CREATE",
-	"VK_FENCE_CREATE",
-	"VK_SEMAPHORE_CREATE",
-	"VK_DESCRIPTOR_POOL_CREATE",
-	"VK_DESCRIPTOR_SET_CREATE",
-	"VK_PIPELINE_CREATE",
-	"RENDERER_LIGHTS",
-	"RENDERER_SHADOW_PASSES",
-	"RENDERER_MERGED_DRAWCALLS",
-	"RENDERER_REFLECTIONS",
-	"RESOURCE_ASYNC_TASKS",
-	"SCENE_NODES_UPDATED",
-	"STAGING_UNIFORMS_SIZE",
-	"STAGING_STORAGE_SIZE"}};
-
-#	define ANKI_TRACE_FILE_ERROR() \
-		if(err) \
-		{ \
-			ANKI_CORE_LOGE("Error writing the trace file"); \
-		}
-
-const U MAX_EVENTS_DEPTH = 20;
-thread_local Second g_traceEventStartTime[MAX_EVENTS_DEPTH];
-thread_local I g_traceEventsInFlight = 0;
-
-TraceManager::~TraceManager()
-{
-	// No need to close the json (no need to add ']'). Chrome will take care of that
-}
-
-Error TraceManager::create(HeapAllocator<U8> alloc, const CString& cacheDir)
-{
-	if(getenv("ANKI_DISABLE_TRACE") && CString(getenv("ANKI_DISABLE_TRACE")) == "1")
-	{
-		m_disabled = true;
-		return Error::NONE;
-	}
-
-	memset(&m_perFrameCounters[0], 0, sizeof(m_perFrameCounters));
-	memset(&m_perRunCounters[0], 0, sizeof(m_perRunCounters));
-
-	// Create trace file
-	StringAuto fname(alloc);
-	fname.sprintf("%s/trace.json", &cacheDir[0]);
-
-	ANKI_CHECK(m_traceFile.open(fname.toCString(), FileOpenFlag::WRITE));
-	ANKI_CHECK(m_traceFile.writeText("["));
-
-	// Create per frame file
-	StringAuto perFrameFname(alloc);
-	perFrameFname.sprintf("%s/per_frame.csv", &cacheDir[0]);
-	ANKI_CHECK(m_perFrameFile.open(perFrameFname.toCString(), FileOpenFlag::WRITE));
-
-	ANKI_CHECK(m_perFrameFile.writeText("FPS, "));
-	for(U i = 0; i < U(TraceCounterType::COUNT); ++i)
-	{
-		ANKI_CHECK(m_perFrameFile.writeText("%s, ", counterNames[i]));
-	}
-
-	for(U i = 0; i < U(TraceEventType::COUNT); ++i)
-	{
-		const char* fmt = (i < U(TraceEventType::COUNT) - 1) ? "%s, " : "%s\n";
-		ANKI_CHECK(m_perFrameFile.writeText(fmt, eventNames[i]));
-	}
-
-	return Error::NONE;
-}
-
-void TraceManager::startEvent()
-{
-	if(ANKI_UNLIKELY(m_disabled))
-	{
-		return;
-	}
-
-	I i = ++g_traceEventsInFlight;
-	--i;
-	ANKI_ASSERT(i >= 0 && i <= I(MAX_EVENTS_DEPTH));
-
-	g_traceEventStartTime[i] = HighRezTimer::getCurrentTime();
-}
-
-void TraceManager::stopEvent(TraceEventType type)
-{
-	if(ANKI_UNLIKELY(m_disabled))
-	{
-		return;
-	}
-
-	ANKI_ASSERT(g_traceEventsInFlight > 0 && g_traceEventsInFlight < I(MAX_EVENTS_DEPTH));
-	I i = --g_traceEventsInFlight;
-	ANKI_ASSERT(i >= 0 && i < I(MAX_EVENTS_DEPTH));
-	auto startedTime = g_traceEventStartTime[i];
-
-	U id = m_count.fetchAdd(1);
-	if(id < BUFFERED_ENTRIES)
-	{
-		auto now = HighRezTimer::getCurrentTime();
-		auto dur = now - startedTime;
-
-		m_entries[id] = Entry{type, startedTime, dur, Thread::getCurrentThreadId()};
-
-		m_perFrameCounters[U(TraceCounterType::COUNT) + U(type)].fetchAdd(U64(dur * 1000000000.0));
-	}
-	else
-	{
-		ANKI_CORE_LOGW("Increase the buffered trace entries");
-		m_perFrameCounters[U(TraceCounterType::COUNT) + U(type)].fetchAdd(0);
-	}
-}
-
-Error TraceManager::flushCounters()
-{
-	if(ANKI_UNLIKELY(m_disabled))
-	{
-		return Error::NONE;
-	}
-
-	// Write the FPS counter
-	Second now = HighRezTimer::getCurrentTime();
-	Second time = now - m_startFrameTime;
-	F32 fps = 1.0 / time;
-	ANKI_CHECK(m_traceFile.writeText("{\"name\": \"FPS\", \"cat\": \"PERF\", \"ph\": \"C\", "
-									 "\"pid\": 1, \"ts\": %llu, \"args\": {\"val\": %f}},\n",
-		U64(m_startFrameTime * 1000000.0),
-		fps));
-
-	ANKI_CHECK(m_perFrameFile.writeText("%f, ", fps));
-
-	for(U i = 0; i < U(TraceCounterType::COUNT); ++i)
-	{
-		auto count = m_perFrameCounters[i].exchange(0);
-
-		ANKI_CHECK(m_traceFile.writeText("{\"name\": \"%s\", \"cat\": \"PERF\", \"ph\": \"C\", "
-										 "\"pid\": 1, \"ts\": %llu, \"args\": {\"val\": %llu}},\n",
-			counterNames[i],
-			U64(m_startFrameTime * 1000000.0),
-			count));
-
-		ANKI_CHECK(m_perFrameFile.writeText("%llu, ", count));
-	}
-
-	return Error::NONE;
-}
-
-Error TraceManager::flushEvents()
-{
-	if(ANKI_UNLIKELY(m_disabled))
-	{
-		return Error::NONE;
-	}
-
-	// Write the events
-	U count = m_count.exchange(0);
-	count = min<U>(count, BUFFERED_ENTRIES);
-
-	for(U i = 0; i < count; ++i)
-	{
-		const Entry& e = m_entries[i];
-
-		U64 startMicroSec = U64(e.m_timestamp * 1000000.0);
-		U64 durMicroSec = U64(e.m_duration * 1000000.0);
-
-		if(durMicroSec == 0)
-		{
-			continue;
-		}
-
-		ANKI_CHECK(m_traceFile.writeText("{\"name\": \"%s\", \"cat\": \"PERF\", \"ph\": \"X\", "
-										 "\"pid\": 1, \"tid\": %llu, \"ts\": %llu, \"dur\": %llu},\n",
-			eventNames[e.m_event],
-			e.m_tid,
-			startMicroSec,
-			durMicroSec));
-	}
-
-	for(U i = 0; i < U(TraceEventType::COUNT); ++i)
-	{
-		const char* fmt = (i < U(TraceEventType::COUNT) - 1) ? "%f, " : "%f\n";
-		U64 ns = m_perFrameCounters[i + U(TraceCounterType::COUNT)].exchange(0);
-		ANKI_CHECK(m_perFrameFile.writeText(fmt, F64(ns) / 1000000.0)); // Time in ms
-	}
-
-	return Error::NONE;
-}
-
-void TraceManager::startFrame()
-{
-	if(ANKI_UNLIKELY(m_disabled))
-	{
-		return;
-	}
-
-	m_startFrameTime = HighRezTimer::getCurrentTime();
-}
-
-void TraceManager::stopFrame()
-{
-	if(ANKI_UNLIKELY(m_disabled))
-	{
-		return;
-	}
-
-	Error err = flushCounters();
-
-	if(!err)
-	{
-		err = flushEvents();
-	}
-
-	if(err)
-	{
-		ANKI_CORE_LOGE("Error writing the trace file");
-	}
-}
-
-} // end namespace anki
-
-#endif

+ 5 - 173
src/anki/core/Trace.h

@@ -6,187 +6,19 @@
 #pragma once
 #pragma once
 
 
 #include <anki/core/Common.h>
 #include <anki/core/Common.h>
-#include <anki/util/StdTypes.h>
-#include <anki/util/Singleton.h>
-#include <anki/util/Array.h>
-#include <anki/util/Thread.h>
-#include <anki/util/Atomic.h>
-#include <anki/util/Logger.h>
-#include <anki/util/File.h>
-
-namespace anki
-{
-
-/// @addtogroup core
-/// @{
-
-/// Trace event type.
-enum class TraceEventType
-{
-	RESOURCE_ALLOCATE_TRANSFER,
-	RESOURCE_ASYNC_TASK,
-	RESOURCE_FILE_READ,
-	SCENE_UPDATE,
-	SCENE_DELETE_STUFF,
-	SCENE_PHYSICS_UPDATE,
-	SCENE_NODES_UPDATE,
-	SCENE_VISIBILITY_TESTS,
-	SCENE_VISIBILITY_TEST,
-	SCENE_VISIBILITY_COMBINE_RESULTS,
-	SCENE_VISIBILITY_ITERATE_SECTORS,
-	SCENE_VISIBILITY_GATHER_TRIANGLES,
-	SCENE_VISIBILITY_RASTERIZE,
-	SCENE_RASTERIZER_TEST,
-	RENDERER_INIT,
-	RENDER,
-	RENDER_MS,
-	RENDER_IS,
-	RENDER_SM,
-	RENDER_IR,
-	RENDER_DRAWER,
-	RENDERER_COMMAND_BUFFER_BUILDING,
-	RENDERER_LIGHT_BINNING,
-	GR_RENDER_GRAPH,
-	GR_COMMAND_BUFFER_RESET,
-	GR_SHADER_COMPILE,
-	GL_THREAD,
-	GL_2ND_LEVEL_CMD_BUFFER,
-	GL_BIND_RESOURCES,
-	GL_BIND_PPLINE,
-	GL_CMD_BUFFER_DESTROY,
-	VK_ACQUIRE_IMAGE,
-	VK_QUEUE_SUBMIT,
-	VK_PIPELINE_CREATE,
-	VK_BIND_OBJECT,
-	VK_DESCRIPTOR_SET_GET_OR_CREATE,
-	SWAP_BUFFERS,
-	BARRIER_WAIT,
-	LUA_EXEC,
-	TIMER_TICK_SLEEP,
-
-	COUNT
-};
-
-/// Trace counter type.
-enum class TraceCounterType
-{
-	GR_DRAWCALLS,
-	GR_VERTICES,
-	GL_PROGS_SKIPPED,
-	VK_PIPELINE_BARRIERS,
-	VK_CMD_BUFFER_CREATE,
-	VK_FENCE_CREATE,
-	VK_SEMAPHORE_CREATE,
-	VK_DESCRIPTOR_POOL_CREATE,
-	VK_DESCRIPTOR_SET_CREATE,
-	VK_PIPELINE_CREATE,
-	RENDERER_LIGHTS,
-	RENDERER_SHADOW_PASSES,
-	RENDERER_MERGED_DRAWCALLS,
-	RENDERER_REFLECTIONS,
-	RESOURCE_ASYNC_TASKS,
-	SCENE_NODES_UPDATED,
-	STAGING_UNIFORMS_SIZE,
-	STAGING_STORAGE_SIZE,
-
-	COUNT
-};
-
-/// Trace manager.
-class TraceManager
-{
-public:
-	TraceManager()
-	{
-	}
-
-	~TraceManager();
-
-	ANKI_USE_RESULT Error create(HeapAllocator<U8> alloc, const CString& cacheDir);
-
-	void startEvent();
-
-	void stopEvent(TraceEventType type);
-
-	void incCounter(TraceCounterType c, U64 val)
-	{
-		if(!m_disabled)
-		{
-			m_perFrameCounters[U(c)].fetchAdd(val);
-		}
-	}
-
-	void startFrame();
-
-	void stopFrame();
-
-private:
-	class Entry
-	{
-	public:
-		TraceEventType m_event;
-		Second m_timestamp; ///< When it started.
-		Second m_duration;
-		ThreadId m_tid;
-	};
-
-	static const U BUFFERED_ENTRIES = 1024 * 20;
-	Array<Entry, BUFFERED_ENTRIES> m_entries;
-	Atomic<U32> m_count = {0};
-	File m_traceFile;
-	File m_perFrameFile;
-	File m_perRunFile;
-	Second m_startFrameTime;
-
-	Array<Atomic<U64>, U(TraceEventType::COUNT) + U(TraceCounterType::COUNT)> m_perFrameCounters = {{}};
-	Array<Atomic<U64>, U(TraceEventType::COUNT) + U(TraceCounterType::COUNT)> m_perRunCounters = {{}};
-
-	Bool m_disabled = false;
-
-	ANKI_USE_RESULT Error flushCounters();
-	ANKI_USE_RESULT Error flushEvents();
-};
-
-class ScopedTraceManagerEvent
-{
-public:
-	ScopedTraceManagerEvent(TraceManager* manager, TraceEventType type)
-		: m_manager(manager)
-		, m_type(type)
-	{
-		m_manager->startEvent();
-	}
-
-	~ScopedTraceManagerEvent()
-	{
-		m_manager->stopEvent(m_type);
-	}
-
-private:
-	TraceManager* m_manager;
-	TraceEventType m_type;
-};
-
-using TraceManagerSingleton = Singleton<TraceManager>;
+#include <anki/util/Tracer.h>
 
 
 /// @name Trace macros.
 /// @name Trace macros.
 /// @{
 /// @{
 #if ANKI_ENABLE_TRACE
 #if ANKI_ENABLE_TRACE
-#	define ANKI_TRACE_START_EVENT(name_) TraceManagerSingleton::get().startEvent()
-#	define ANKI_TRACE_STOP_EVENT(name_) TraceManagerSingleton::get().stopEvent(TraceEventType::name_)
-#	define ANKI_TRACE_SCOPED_EVENT(name_) \
-		ScopedTraceManagerEvent _tse##name_(&TraceManagerSingleton::get(), TraceEventType::name_)
-#	define ANKI_TRACE_INC_COUNTER(name_, val_) TraceManagerSingleton::get().incCounter(TraceCounterType::name_, val_)
-#	define ANKI_TRACE_START_FRAME() TraceManagerSingleton::get().startFrame()
-#	define ANKI_TRACE_STOP_FRAME() TraceManagerSingleton::get().stopFrame()
+#	define ANKI_TRACE_START_EVENT(name_) TracerEventHandle _teh##name_ = TracerSingleton::get().beginEvent()
+#	define ANKI_TRACE_STOP_EVENT(name_) TracerSingleton::get().endEvent(#	name_, _teh##name_)
+#	define ANKI_TRACE_SCOPED_EVENT(name_) TraceScopedEvent _tse##name_(#	name_)
+#	define ANKI_TRACE_INC_COUNTER(name_, val_) TracerSingleton::get().increaseCounter(#	name_, val_)
 #else
 #else
 #	define ANKI_TRACE_START_EVENT(name_) ((void)0)
 #	define ANKI_TRACE_START_EVENT(name_) ((void)0)
 #	define ANKI_TRACE_STOP_EVENT(name_) ((void)0)
 #	define ANKI_TRACE_STOP_EVENT(name_) ((void)0)
 #	define ANKI_TRACE_SCOPED_EVENT(name_) ((void)0)
 #	define ANKI_TRACE_SCOPED_EVENT(name_) ((void)0)
 #	define ANKI_TRACE_INC_COUNTER(name_, val_) ((void)0)
 #	define ANKI_TRACE_INC_COUNTER(name_, val_) ((void)0)
-#	define ANKI_TRACE_START_FRAME() ((void)0)
-#	define ANKI_TRACE_STOP_FRAME() ((void)0)
 #endif
 #endif
 /// @}
 /// @}
-
-} // end namespace anki

+ 1 - 3
src/anki/gr/gl/CommandBufferImpl.cpp

@@ -43,7 +43,7 @@ void CommandBufferImpl::init(const CommandBufferInitInfo& init)
 
 
 void CommandBufferImpl::destroy()
 void CommandBufferImpl::destroy()
 {
 {
-	ANKI_TRACE_START_EVENT(GL_CMD_BUFFER_DESTROY);
+	ANKI_TRACE_SCOPED_EVENT(GL_CMD_BUFFER_DESTROY);
 
 
 #if ANKI_EXTRA_CHECKS
 #if ANKI_EXTRA_CHECKS
 	if(!m_executed && m_firstCommand)
 	if(!m_executed && m_firstCommand)
@@ -64,8 +64,6 @@ void CommandBufferImpl::destroy()
 				&& "Someone is holding a reference to the command buffer's allocator");
 				&& "Someone is holding a reference to the command buffer's allocator");
 
 
 	m_alloc = CommandBufferAllocator<U8>();
 	m_alloc = CommandBufferAllocator<U8>();
-
-	ANKI_TRACE_STOP_EVENT(GL_CMD_BUFFER_DESTROY);
 }
 }
 
 
 Error CommandBufferImpl::executeAllCommands()
 Error CommandBufferImpl::executeAllCommands()

+ 7 - 8
src/anki/gr/gl/RenderingThread.cpp

@@ -248,9 +248,11 @@ void RenderingThread::threadLoop()
 			++m_head;
 			++m_head;
 		}
 		}
 
 
-		ANKI_TRACE_START_EVENT(GL_THREAD);
-		Error err = static_cast<CommandBufferImpl&>(*cmd).executeAllCommands();
-		ANKI_TRACE_STOP_EVENT(GL_THREAD);
+		Error err = Error::NONE;
+		{
+			ANKI_TRACE_SCOPED_EVENT(GL_THREAD);
+			err = static_cast<CommandBufferImpl&>(*cmd).executeAllCommands();
+		}
 
 
 		if(err)
 		if(err)
 		{
 		{
@@ -274,7 +276,7 @@ void RenderingThread::syncClientServer()
 
 
 void RenderingThread::swapBuffersInternal()
 void RenderingThread::swapBuffersInternal()
 {
 {
-	ANKI_TRACE_START_EVENT(SWAP_BUFFERS);
+	ANKI_TRACE_SCOPED_EVENT(SWAP_BUFFERS);
 
 
 	// Do the swap buffers
 	// Do the swap buffers
 	m_manager->swapBuffers();
 	m_manager->swapBuffers();
@@ -286,13 +288,11 @@ void RenderingThread::swapBuffersInternal()
 
 
 		m_frameCondVar.notifyOne();
 		m_frameCondVar.notifyOne();
 	}
 	}
-
-	ANKI_TRACE_STOP_EVENT(SWAP_BUFFERS);
 }
 }
 
 
 void RenderingThread::swapBuffers()
 void RenderingThread::swapBuffers()
 {
 {
-	ANKI_TRACE_START_EVENT(SWAP_BUFFERS);
+	ANKI_TRACE_SCOPED_EVENT(SWAP_BUFFERS);
 	// Wait for the rendering thread to finish swap buffers...
 	// Wait for the rendering thread to finish swap buffers...
 	{
 	{
 		LockGuard<Mutex> lock(m_frameMtx);
 		LockGuard<Mutex> lock(m_frameMtx);
@@ -306,7 +306,6 @@ void RenderingThread::swapBuffers()
 
 
 	// ...and then flush a new swap buffers
 	// ...and then flush a new swap buffers
 	flushCommandBuffer(m_swapBuffersCommands, nullptr);
 	flushCommandBuffer(m_swapBuffersCommands, nullptr);
-	ANKI_TRACE_STOP_EVENT(SWAP_BUFFERS);
 }
 }
 
 
 } // end namespace anki
 } // end namespace anki

+ 4 - 3
src/anki/gr/vulkan/BufferImpl.cpp

@@ -133,9 +133,10 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	getGrManagerImpl().getGpuMemoryManager().allocateMemory(memIdx, req.size, req.alignment, true, m_memHandle);
 	getGrManagerImpl().getGpuMemoryManager().allocateMemory(memIdx, req.size, req.alignment, true, m_memHandle);
 
 
 	// Bind mem to buffer
 	// Bind mem to buffer
-	ANKI_TRACE_START_EVENT(VK_BIND_OBJECT);
-	ANKI_VK_CHECK(vkBindBufferMemory(getDevice(), m_handle, m_memHandle.m_memory, m_memHandle.m_offset));
-	ANKI_TRACE_STOP_EVENT(VK_BIND_OBJECT);
+	{
+		ANKI_TRACE_SCOPED_EVENT(VK_BIND_OBJECT);
+		ANKI_VK_CHECK(vkBindBufferMemory(getDevice(), m_handle, m_memHandle.m_memory, m_memHandle.m_offset));
+	}
 
 
 	m_access = access;
 	m_access = access;
 	m_size = inf.m_size;
 	m_size = inf.m_size;

+ 1 - 3
src/anki/gr/vulkan/DescriptorSet.cpp

@@ -621,14 +621,13 @@ Error DescriptorSetFactory::newDescriptorSet(ThreadId tid,
 	Array<U32, MAX_UNIFORM_BUFFER_BINDINGS + MAX_STORAGE_BUFFER_BINDINGS>& dynamicOffsets,
 	Array<U32, MAX_UNIFORM_BUFFER_BINDINGS + MAX_STORAGE_BUFFER_BINDINGS>& dynamicOffsets,
 	U& dynamicOffsetCount)
 	U& dynamicOffsetCount)
 {
 {
-	ANKI_TRACE_START_EVENT(VK_DESCRIPTOR_SET_GET_OR_CREATE);
+	ANKI_TRACE_SCOPED_EVENT(VK_DESCRIPTOR_SET_GET_OR_CREATE);
 
 
 	U64 hash;
 	U64 hash;
 	state.flush(dirty, hash, dynamicOffsets, dynamicOffsetCount);
 	state.flush(dirty, hash, dynamicOffsets, dynamicOffsetCount);
 
 
 	if(!dirty)
 	if(!dirty)
 	{
 	{
-		ANKI_TRACE_STOP_EVENT(VK_DESCRIPTOR_SET_GET_OR_CREATE);
 		return Error::NONE;
 		return Error::NONE;
 	}
 	}
 
 
@@ -645,7 +644,6 @@ Error DescriptorSetFactory::newDescriptorSet(ThreadId tid,
 	set.m_handle = s->m_handle;
 	set.m_handle = s->m_handle;
 	ANKI_ASSERT(set.m_handle != VK_NULL_HANDLE);
 	ANKI_ASSERT(set.m_handle != VK_NULL_HANDLE);
 
 
-	ANKI_TRACE_STOP_EVENT(VK_DESCRIPTOR_SET_GET_OR_CREATE);
 	return Error::NONE;
 	return Error::NONE;
 }
 }
 
 

+ 15 - 11
src/anki/gr/vulkan/GrManagerImpl.cpp

@@ -684,14 +684,15 @@ void GrManagerImpl::beginFrame()
 
 
 	// Get new image
 	// Get new image
 	uint32_t imageIdx;
 	uint32_t imageIdx;
-	ANKI_TRACE_START_EVENT(VK_ACQUIRE_IMAGE);
-	ANKI_VK_CHECKF(vkAcquireNextImageKHR(m_device,
-		m_crntSwapchain->m_swapchain,
-		UINT64_MAX,
-		frame.m_acquireSemaphore->getHandle(),
-		fence->getHandle(),
-		&imageIdx));
-	ANKI_TRACE_STOP_EVENT(VK_ACQUIRE_IMAGE);
+	{
+		ANKI_TRACE_SCOPED_EVENT(VK_ACQUIRE_IMAGE);
+		ANKI_VK_CHECKF(vkAcquireNextImageKHR(m_device,
+			m_crntSwapchain->m_swapchain,
+			UINT64_MAX,
+			frame.m_acquireSemaphore->getHandle(),
+			fence->getHandle(),
+			&imageIdx));
+	}
 
 
 	ANKI_ASSERT(imageIdx < MAX_FRAMES_IN_FLIGHT);
 	ANKI_ASSERT(imageIdx < MAX_FRAMES_IN_FLIGHT);
 	m_crntSwapchain->m_currentBackbufferIndex = imageIdx;
 	m_crntSwapchain->m_currentBackbufferIndex = imageIdx;
@@ -699,6 +700,8 @@ void GrManagerImpl::beginFrame()
 
 
 void GrManagerImpl::endFrame()
 void GrManagerImpl::endFrame()
 {
 {
+	ANKI_TRACE_SCOPED_EVENT(VK_PRESENT);
+
 	LockGuard<Mutex> lock(m_globalMtx);
 	LockGuard<Mutex> lock(m_globalMtx);
 
 
 	PerFrame& frame = m_perFrame[m_frame % MAX_FRAMES_IN_FLIGHT];
 	PerFrame& frame = m_perFrame[m_frame % MAX_FRAMES_IN_FLIGHT];
@@ -804,9 +807,10 @@ void GrManagerImpl::flushCommandBuffer(CommandBufferPtr cmdb, FencePtr* outFence
 
 
 	impl.setFence(fence);
 	impl.setFence(fence);
 
 
-	ANKI_TRACE_START_EVENT(VK_QUEUE_SUBMIT);
-	ANKI_VK_CHECKF(vkQueueSubmit(m_queue, 1, &submit, fence->getHandle()));
-	ANKI_TRACE_STOP_EVENT(VK_QUEUE_SUBMIT);
+	{
+		ANKI_TRACE_SCOPED_EVENT(VK_QUEUE_SUBMIT);
+		ANKI_VK_CHECKF(vkQueueSubmit(m_queue, 1, &submit, fence->getHandle()));
+	}
 
 
 	if(wait)
 	if(wait)
 	{
 	{

+ 5 - 3
src/anki/gr/vulkan/Pipeline.cpp

@@ -446,9 +446,11 @@ void PipelineFactory::newPipeline(PipelineStateTracker& state, Pipeline& ppline,
 		const VkGraphicsPipelineCreateInfo& ci = state.updatePipelineCreateInfo();
 		const VkGraphicsPipelineCreateInfo& ci = state.updatePipelineCreateInfo();
 		pp.m_fb = state.getFb();
 		pp.m_fb = state.getFb();
 
 
-		ANKI_TRACE_START_EVENT(VK_PIPELINE_CREATE);
-		ANKI_VK_CHECKF(vkCreateGraphicsPipelines(m_dev, m_pplineCache, 1, &ci, nullptr, &pp.m_handle));
-		ANKI_TRACE_STOP_EVENT(VK_PIPELINE_CREATE);
+		{
+			ANKI_TRACE_SCOPED_EVENT(VK_PIPELINE_CREATE);
+			ANKI_VK_CHECKF(vkCreateGraphicsPipelines(m_dev, m_pplineCache, 1, &ci, nullptr, &pp.m_handle));
+		}
+
 		ANKI_TRACE_INC_COUNTER(VK_PIPELINE_CREATE, 1);
 		ANKI_TRACE_INC_COUNTER(VK_PIPELINE_CREATE, 1);
 
 
 		m_pplines.emplace(m_alloc, hash, pp);
 		m_pplines.emplace(m_alloc, hash, pp);

+ 2 - 4
src/anki/gr/vulkan/TextureImpl.cpp

@@ -327,9 +327,8 @@ Error TextureImpl::initImage(const TextureInitInfo& init_)
 		getGrManagerImpl().getGpuMemoryManager().allocateMemory(memIdx, req.size, req.alignment, false, m_memHandle);
 		getGrManagerImpl().getGpuMemoryManager().allocateMemory(memIdx, req.size, req.alignment, false, m_memHandle);
 
 
 		// Bind mem to image
 		// Bind mem to image
-		ANKI_TRACE_START_EVENT(VK_BIND_OBJECT);
+		ANKI_TRACE_SCOPED_EVENT(VK_BIND_OBJECT);
 		ANKI_VK_CHECK(vkBindImageMemory(getDevice(), m_imageHandle, m_memHandle.m_memory, m_memHandle.m_offset));
 		ANKI_VK_CHECK(vkBindImageMemory(getDevice(), m_imageHandle, m_memHandle.m_memory, m_memHandle.m_offset));
-		ANKI_TRACE_STOP_EVENT(VK_BIND_OBJECT);
 	}
 	}
 	else
 	else
 	{
 	{
@@ -347,9 +346,8 @@ Error TextureImpl::initImage(const TextureInitInfo& init_)
 		getGrManagerImpl().trySetVulkanHandleName(
 		getGrManagerImpl().trySetVulkanHandleName(
 			init.getName(), VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, ptrToNumber(m_dedicatedMem));
 			init.getName(), VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, ptrToNumber(m_dedicatedMem));
 
 
-		ANKI_TRACE_START_EVENT(VK_BIND_OBJECT);
+		ANKI_TRACE_SCOPED_EVENT(VK_BIND_OBJECT);
 		ANKI_VK_CHECK(vkBindImageMemory(getDevice(), m_imageHandle, m_dedicatedMem, 0));
 		ANKI_VK_CHECK(vkBindImageMemory(getDevice(), m_imageHandle, m_dedicatedMem, 0));
-		ANKI_TRACE_STOP_EVENT(VK_BIND_OBJECT);
 	}
 	}
 
 
 	return Error::NONE;
 	return Error::NONE;

+ 109 - 36
src/anki/renderer/DownscaleBlur.cpp

@@ -34,26 +34,49 @@ Error DownscaleBlur::initInternal(const ConfigSet&)
 	// Create the miped texture
 	// Create the miped texture
 	TextureInitInfo texinit = m_r->create2DRenderTargetDescription(
 	TextureInitInfo texinit = m_r->create2DRenderTargetDescription(
 		m_r->getWidth() / 2, m_r->getHeight() / 2, LIGHT_SHADING_COLOR_ATTACHMENT_PIXEL_FORMAT, "DownscaleBlur");
 		m_r->getWidth() / 2, m_r->getHeight() / 2, LIGHT_SHADING_COLOR_ATTACHMENT_PIXEL_FORMAT, "DownscaleBlur");
-	texinit.m_usage = TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE
-					  | TextureUsageBit::SAMPLED_COMPUTE;
+	texinit.m_usage = TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::SAMPLED_COMPUTE
+					  | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE; // TODO remove FB_WRITE
+	if(m_useCompute)
+	{
+		texinit.m_usage |= TextureUsageBit::SAMPLED_COMPUTE | TextureUsageBit::IMAGE_COMPUTE_WRITE;
+	}
+	else
+	{
+		texinit.m_usage |= TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE;
+	}
 	texinit.m_mipmapCount = m_passCount;
 	texinit.m_mipmapCount = m_passCount;
 	texinit.m_initialUsage = TextureUsageBit::SAMPLED_COMPUTE;
 	texinit.m_initialUsage = TextureUsageBit::SAMPLED_COMPUTE;
 	m_rtTex = m_r->createAndClearRenderTarget(texinit);
 	m_rtTex = m_r->createAndClearRenderTarget(texinit);
 
 
 	// FB descr
 	// FB descr
-	m_fbDescrs.create(getAllocator(), m_passCount);
-	for(U pass = 0; pass < m_passCount; ++pass)
+	if(!m_useCompute)
 	{
 	{
-		m_fbDescrs[pass].m_colorAttachmentCount = 1;
-		m_fbDescrs[pass].m_colorAttachments[0].m_loadOperation = AttachmentLoadOperation::DONT_CARE;
-		m_fbDescrs[pass].m_colorAttachments[0].m_surface.m_level = pass;
-		m_fbDescrs[pass].bake();
+		m_fbDescrs.create(getAllocator(), m_passCount);
+		for(U pass = 0; pass < m_passCount; ++pass)
+		{
+			m_fbDescrs[pass].m_colorAttachmentCount = 1;
+			m_fbDescrs[pass].m_colorAttachments[0].m_loadOperation = AttachmentLoadOperation::DONT_CARE;
+			m_fbDescrs[pass].m_colorAttachments[0].m_surface.m_level = pass;
+			m_fbDescrs[pass].bake();
+		}
 	}
 	}
 
 
 	// Shader programs
 	// Shader programs
-	ANKI_CHECK(getResourceManager().loadResource("programs/DownscaleBlur.ankiprog", m_prog));
-	const ShaderProgramResourceVariant* variant;
-	m_prog->getOrCreateVariant(variant);
+	const ShaderProgramResourceVariant* variant = nullptr;
+	if(m_useCompute)
+	{
+		ANKI_CHECK(getResourceManager().loadResource("programs/DownscaleBlurCompute.ankiprog", m_prog));
+
+		ShaderProgramResourceConstantValueInitList<1> consts(m_prog);
+		consts.add("WORKGROUP_SIZE", UVec2(m_workgroupSize[0], m_workgroupSize[1]));
+
+		m_prog->getOrCreateVariant(consts.get(), variant);
+	}
+	else
+	{
+		ANKI_CHECK(getResourceManager().loadResource("programs/DownscaleBlur.ankiprog", m_prog));
+		m_prog->getOrCreateVariant(variant);
+	}
 	m_grProg = variant->getProgram();
 	m_grProg = variant->getProgram();
 
 
 	return Error::NONE;
 	return Error::NONE;
@@ -79,35 +102,68 @@ void DownscaleBlur::populateRenderGraph(RenderingContext& ctx)
 		"Down/Blur #5",
 		"Down/Blur #5",
 		"Down/Blur #6",
 		"Down/Blur #6",
 		"Down/Blur #7"}};
 		"Down/Blur #7"}};
-	for(U i = 0; i < m_passCount; ++i)
+	if(m_useCompute)
 	{
 	{
-		GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(passNames[i]);
-		pass.setWork(runCallback, this, 0);
-
-		if(i > 0)
+		for(U i = 0; i < m_passCount; ++i)
 		{
 		{
-			TextureSubresourceInfo sampleSubresource;
-			TextureSubresourceInfo renderSubresource;
+			ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passNames[i]);
+			pass.setWork(runCallback, this, 0);
+
+			if(i > 0)
+			{
+				TextureSubresourceInfo sampleSubresource;
+				TextureSubresourceInfo renderSubresource;
 
 
-			sampleSubresource.m_firstMipmap = i - 1;
-			renderSubresource.m_firstMipmap = i;
+				sampleSubresource.m_firstMipmap = i - 1;
+				renderSubresource.m_firstMipmap = i;
 
 
-			pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
-			pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::SAMPLED_FRAGMENT, sampleSubresource});
+				pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE, renderSubresource});
+				pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::SAMPLED_COMPUTE, sampleSubresource});
 
 
-			pass.newProducer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
+				pass.newProducer({m_runCtx.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE, renderSubresource});
+			}
+			else
+			{
+				TextureSubresourceInfo renderSubresource;
+
+				pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE, renderSubresource});
+				pass.newConsumer({m_r->getTemporalAA().getRt(), TextureUsageBit::SAMPLED_COMPUTE});
+
+				pass.newProducer({m_runCtx.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE, renderSubresource});
+			}
 		}
 		}
-		else
+	}
+	else
+	{
+		for(U i = 0; i < m_passCount; ++i)
 		{
 		{
-			TextureSubresourceInfo renderSubresource;
+			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(passNames[i]);
+			pass.setWork(runCallback, this, 0);
+			pass.setFramebufferInfo(m_fbDescrs[i], {{m_runCtx.m_rt}}, {});
 
 
-			pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
-			pass.newConsumer({m_r->getTemporalAA().getRt(), TextureUsageBit::SAMPLED_FRAGMENT});
+			if(i > 0)
+			{
+				TextureSubresourceInfo sampleSubresource;
+				TextureSubresourceInfo renderSubresource;
 
 
-			pass.newProducer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
-		}
+				sampleSubresource.m_firstMipmap = i - 1;
+				renderSubresource.m_firstMipmap = i;
+
+				pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
+				pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::SAMPLED_FRAGMENT, sampleSubresource});
+
+				pass.newProducer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
+			}
+			else
+			{
+				TextureSubresourceInfo renderSubresource;
 
 
-		pass.setFramebufferInfo(m_fbDescrs[i], {{m_runCtx.m_rt}}, {});
+				pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
+				pass.newConsumer({m_r->getTemporalAA().getRt(), TextureUsageBit::SAMPLED_FRAGMENT});
+
+				pass.newProducer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
+			}
+		}
 	}
 	}
 }
 }
 
 
@@ -115,15 +171,16 @@ void DownscaleBlur::run(RenderPassWorkContext& rgraphCtx)
 {
 {
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 
 
+	cmdb->bindShaderProgram(m_grProg);
+
 	const U passIdx = m_runCtx.m_crntPassIdx++;
 	const U passIdx = m_runCtx.m_crntPassIdx++;
+	const U vpWidth = m_rtTex->getWidth() >> passIdx;
+	const U vpHeight = m_rtTex->getHeight() >> passIdx;
 
 
 	if(passIdx > 0)
 	if(passIdx > 0)
 	{
 	{
-		// Bind the Rt
-
 		TextureSubresourceInfo sampleSubresource;
 		TextureSubresourceInfo sampleSubresource;
 		sampleSubresource.m_firstMipmap = passIdx - 1;
 		sampleSubresource.m_firstMipmap = passIdx - 1;
-
 		rgraphCtx.bindTextureAndSampler(0, 0, m_runCtx.m_rt, sampleSubresource, m_r->getLinearSampler());
 		rgraphCtx.bindTextureAndSampler(0, 0, m_runCtx.m_rt, sampleSubresource, m_r->getLinearSampler());
 	}
 	}
 	else
 	else
@@ -131,9 +188,25 @@ void DownscaleBlur::run(RenderPassWorkContext& rgraphCtx)
 		rgraphCtx.bindColorTextureAndSampler(0, 0, m_r->getTemporalAA().getRt(), m_r->getLinearSampler());
 		rgraphCtx.bindColorTextureAndSampler(0, 0, m_r->getTemporalAA().getRt(), m_r->getLinearSampler());
 	}
 	}
 
 
-	cmdb->setViewport(0, 0, m_rtTex->getWidth() >> passIdx, m_rtTex->getHeight() >> passIdx);
-	cmdb->bindShaderProgram(m_grProg);
-	drawQuad(cmdb);
+	if(m_useCompute)
+	{
+		TextureSubresourceInfo sampleSubresource;
+		sampleSubresource.m_firstMipmap = passIdx;
+		rgraphCtx.bindImage(0, 0, m_runCtx.m_rt, sampleSubresource);
+
+		Vec4 fbSize(vpWidth, vpHeight, 0.0f, 0.0f);
+		cmdb->setPushConstants(&fbSize, sizeof(fbSize));
+	}
+
+	if(m_useCompute)
+	{
+		dispatchPPCompute(cmdb, m_workgroupSize[0], m_workgroupSize[1], vpWidth, vpHeight);
+	}
+	else
+	{
+		cmdb->setViewport(0, 0, vpWidth, vpHeight);
+		drawQuad(cmdb);
+	}
 }
 }
 
 
 } // end namespace anki
 } // end namespace anki

+ 3 - 0
src/anki/renderer/DownscaleBlur.h

@@ -53,6 +53,9 @@ anki_internal:
 	}
 	}
 
 
 private:
 private:
+	static const Bool m_useCompute = false;
+	Array<U32, 2> m_workgroupSize = {{8, 8}};
+
 	U8 m_passCount = 0; ///< It's also the mip count of the m_rtTex.
 	U8 m_passCount = 0; ///< It's also the mip count of the m_rtTex.
 
 
 	TexturePtr m_rtTex;
 	TexturePtr m_rtTex;

+ 1 - 1
src/anki/renderer/Drawer.cpp

@@ -79,7 +79,7 @@ void RenderableDrawer::flushDrawcall(DrawContext& ctx)
 	// Rendered something, reset the cached transforms
 	// Rendered something, reset the cached transforms
 	if(ctx.m_cachedRenderElementCount > 1)
 	if(ctx.m_cachedRenderElementCount > 1)
 	{
 	{
-		ANKI_TRACE_INC_COUNTER(RENDERER_MERGED_DRAWCALLS, ctx.m_cachedRenderElementCount - 1);
+		ANKI_TRACE_INC_COUNTER(R_MERGED_DRAWCALLS, ctx.m_cachedRenderElementCount - 1);
 	}
 	}
 	ctx.m_cachedRenderElementCount = 0;
 	ctx.m_cachedRenderElementCount = 0;
 }
 }

+ 2 - 2
src/anki/renderer/GBuffer.cpp

@@ -70,7 +70,7 @@ Error GBuffer::initInternal(const ConfigSet& initializer)
 
 
 void GBuffer::runInThread(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx) const
 void GBuffer::runInThread(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx) const
 {
 {
-	ANKI_TRACE_SCOPED_EVENT(RENDER_MS);
+	ANKI_TRACE_SCOPED_EVENT(R_MS);
 
 
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	const U threadId = rgraphCtx.m_currentSecondLevelCommandBufferIndex;
 	const U threadId = rgraphCtx.m_currentSecondLevelCommandBufferIndex;
@@ -136,7 +136,7 @@ void GBuffer::runInThread(const RenderingContext& ctx, RenderPassWorkContext& rg
 
 
 void GBuffer::populateRenderGraph(RenderingContext& ctx)
 void GBuffer::populateRenderGraph(RenderingContext& ctx)
 {
 {
-	ANKI_TRACE_SCOPED_EVENT(RENDER_MS);
+	ANKI_TRACE_SCOPED_EVENT(R_MS);
 
 
 	m_ctx = &ctx;
 	m_ctx = &ctx;
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;

+ 5 - 5
src/anki/renderer/Indirect.cpp

@@ -348,7 +348,7 @@ void Indirect::prepareProbes(
 void Indirect::runGBuffer(CommandBufferPtr& cmdb)
 void Indirect::runGBuffer(CommandBufferPtr& cmdb)
 {
 {
 	ANKI_ASSERT(m_ctx.m_probe);
 	ANKI_ASSERT(m_ctx.m_probe);
-	ANKI_TRACE_SCOPED_EVENT(RENDER_IR);
+	ANKI_TRACE_SCOPED_EVENT(R_IR);
 	const ReflectionProbeQueueElement& probe = *m_ctx.m_probe;
 	const ReflectionProbeQueueElement& probe = *m_ctx.m_probe;
 
 
 	// For each face
 	// For each face
@@ -404,7 +404,7 @@ void Indirect::bindVertexIndexBuffers(MeshResourcePtr& mesh, CommandBufferPtr& c
 void Indirect::runLightShading(U32 faceIdx, RenderPassWorkContext& rgraphCtx)
 void Indirect::runLightShading(U32 faceIdx, RenderPassWorkContext& rgraphCtx)
 {
 {
 	ANKI_ASSERT(faceIdx <= 6);
 	ANKI_ASSERT(faceIdx <= 6);
-	ANKI_TRACE_SCOPED_EVENT(RENDER_IR);
+	ANKI_TRACE_SCOPED_EVENT(R_IR);
 
 
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 
 
@@ -525,7 +525,7 @@ void Indirect::runMipmappingOfLightShading(U32 faceIdx, RenderPassWorkContext& r
 	ANKI_ASSERT(faceIdx < 6);
 	ANKI_ASSERT(faceIdx < 6);
 	ANKI_ASSERT(m_ctx.m_cacheEntryIdx < m_cacheEntries.getSize());
 	ANKI_ASSERT(m_ctx.m_cacheEntryIdx < m_cacheEntries.getSize());
 
 
-	ANKI_TRACE_SCOPED_EVENT(RENDER_IR);
+	ANKI_TRACE_SCOPED_EVENT(R_IR);
 
 
 	TextureSubresourceInfo subresource(TextureSurfaceInfo(0, 0, faceIdx, m_ctx.m_cacheEntryIdx));
 	TextureSubresourceInfo subresource(TextureSurfaceInfo(0, 0, faceIdx, m_ctx.m_cacheEntryIdx));
 	subresource.m_mipmapCount = m_lightShading.m_mipCount;
 	subresource.m_mipmapCount = m_lightShading.m_mipCount;
@@ -541,7 +541,7 @@ void Indirect::runMipmappingOfLightShading(U32 faceIdx, RenderPassWorkContext& r
 void Indirect::runIrradiance(U32 faceIdx, RenderPassWorkContext& rgraphCtx)
 void Indirect::runIrradiance(U32 faceIdx, RenderPassWorkContext& rgraphCtx)
 {
 {
 	ANKI_ASSERT(faceIdx < 6);
 	ANKI_ASSERT(faceIdx < 6);
-	ANKI_TRACE_SCOPED_EVENT(RENDER_IR);
+	ANKI_TRACE_SCOPED_EVENT(R_IR);
 	const U32 cacheEntryIdx = m_ctx.m_cacheEntryIdx;
 	const U32 cacheEntryIdx = m_ctx.m_cacheEntryIdx;
 	ANKI_ASSERT(cacheEntryIdx < m_cacheEntries.getSize());
 	ANKI_ASSERT(cacheEntryIdx < m_cacheEntries.getSize());
 
 
@@ -563,7 +563,7 @@ void Indirect::runIrradiance(U32 faceIdx, RenderPassWorkContext& rgraphCtx)
 
 
 void Indirect::populateRenderGraph(RenderingContext& rctx)
 void Indirect::populateRenderGraph(RenderingContext& rctx)
 {
 {
-	ANKI_TRACE_SCOPED_EVENT(RENDER_IR);
+	ANKI_TRACE_SCOPED_EVENT(R_IR);
 
 
 #if ANKI_EXTRA_CHECKS
 #if ANKI_EXTRA_CHECKS
 	m_ctx = {};
 	m_ctx = {};

+ 132 - 126
src/anki/renderer/LightBin.cpp

@@ -19,13 +19,13 @@ const U SIZE_IDX_COUNT = 4;
 // Shader structs and block representations. All positions and directions in viewspace
 // Shader structs and block representations. All positions and directions in viewspace
 // For documentation see the shaders
 // For documentation see the shaders
 
 
-class ShaderCluster
+class LightBin::ShaderCluster
 {
 {
 public:
 public:
 	U32 m_firstIdx;
 	U32 m_firstIdx;
 };
 };
 
 
-class ShaderPointLight
+class LightBin::ShaderPointLight
 {
 {
 public:
 public:
 	Vec4 m_posRadius;
 	Vec4 m_posRadius;
@@ -34,7 +34,7 @@ public:
 	UVec2 m_atlasTiles;
 	UVec2 m_atlasTiles;
 };
 };
 
 
-class ShaderSpotLight
+class LightBin::ShaderSpotLight
 {
 {
 public:
 public:
 	Vec4 m_posRadius;
 	Vec4 m_posRadius;
@@ -44,7 +44,7 @@ public:
 	Mat4 m_texProjectionMat; ///< Texture projection matrix
 	Mat4 m_texProjectionMat; ///< Texture projection matrix
 };
 };
 
 
-class ShaderProbe
+class LightBin::ShaderProbe
 {
 {
 public:
 public:
 	Vec3 m_pos;
 	Vec3 m_pos;
@@ -59,7 +59,7 @@ public:
 	}
 	}
 };
 };
 
 
-class ShaderDecal
+class LightBin::ShaderDecal
 {
 {
 public:
 public:
 	Vec4 m_diffUv;
 	Vec4 m_diffUv;
@@ -73,7 +73,7 @@ static const U MAX_PROBES_PER_CLUSTER = 12;
 static const U MAX_DECALS_PER_CLUSTER = 8;
 static const U MAX_DECALS_PER_CLUSTER = 8;
 static const F32 INVALID_TEXTURE_INDEX = -1.0;
 static const F32 INVALID_TEXTURE_INDEX = -1.0;
 
 
-class ClusterLightIndex
+class LightBin::ClusterLightIndex
 {
 {
 public:
 public:
 	ClusterLightIndex()
 	ClusterLightIndex()
@@ -92,23 +92,24 @@ public:
 		m_index = i;
 		m_index = i;
 	}
 	}
 
 
+	friend Bool operator<(const ClusterLightIndex& a, const ClusterLightIndex& b)
+	{
+		return a.getIndex() < b.getIndex();
+	}
+
 private:
 private:
 	U16 m_index;
 	U16 m_index;
 };
 };
 
 
-static Bool operator<(const ClusterLightIndex& a, const ClusterLightIndex& b)
-{
-	return a.getIndex() < b.getIndex();
-}
-
 /// Store the probe radius for sorting the indices.
 /// Store the probe radius for sorting the indices.
 /// WARNING: Keep it as small as possible, that's why the members are U16
 /// WARNING: Keep it as small as possible, that's why the members are U16
-class ClusterProbeIndex
+class LightBin::ClusterProbeIndex
 {
 {
 public:
 public:
 	ClusterProbeIndex()
 	ClusterProbeIndex()
 	{
 	{
 		// Do nothing. No need to initialize
 		// Do nothing. No need to initialize
+		static_assert(sizeof(ClusterProbeIndex) == sizeof(U16) * 2, "Because we memcmp");
 	}
 	}
 
 
 	U getIndex() const
 	U getIndex() const
@@ -144,10 +145,9 @@ private:
 	U16 m_index;
 	U16 m_index;
 	U16 m_probeRadius;
 	U16 m_probeRadius;
 };
 };
-static_assert(sizeof(ClusterProbeIndex) == sizeof(U16) * 2, "Because we memcmp");
 
 
 /// WARNING: Keep it as small as possible. The number of clusters is huge
 /// WARNING: Keep it as small as possible. The number of clusters is huge
-class alignas(U32) ClusterData
+class alignas(U32) LightBin::ClusterData
 {
 {
 public:
 public:
 	Atomic<U8> m_pointCount;
 	Atomic<U8> m_pointCount;
@@ -271,10 +271,10 @@ private:
 };
 };
 
 
 /// Common data for all tasks.
 /// Common data for all tasks.
-class LightBinContext
+class LightBin::BinContext
 {
 {
 public:
 public:
-	LightBinContext(StackAllocator<U8> alloc)
+	BinContext(StackAllocator<U8> alloc)
 		: m_alloc(alloc)
 		: m_alloc(alloc)
 		, m_tempClusters(alloc)
 		, m_tempClusters(alloc)
 	{
 	{
@@ -326,10 +326,10 @@ public:
 };
 };
 
 
 /// Write the lights to the GPU buffers.
 /// Write the lights to the GPU buffers.
-class WriteLightsTask : public ThreadPoolTask
+class LightBin::WriteLightsTask : public ThreadPoolTask
 {
 {
 public:
 public:
-	LightBinContext* m_ctx = nullptr;
+	BinContext* m_ctx = nullptr;
 
 
 	Error operator()(U32 threadId, PtrSize threadsCount)
 	Error operator()(U32 threadId, PtrSize threadsCount)
 	{
 	{
@@ -367,7 +367,7 @@ Error LightBin::bin(const Mat4& viewMat,
 	Bool shadowsEnabled,
 	Bool shadowsEnabled,
 	LightBinOut& out)
 	LightBinOut& out)
 {
 {
-	ANKI_TRACE_SCOPED_EVENT(RENDERER_LIGHT_BINNING);
+	ANKI_TRACE_SCOPED_EVENT(R_LIGHT_BINNING);
 
 
 	// Prepare the clusterer
 	// Prepare the clusterer
 	ClustererPrepareInfo pinf;
 	ClustererPrepareInfo pinf;
@@ -387,13 +387,13 @@ Error LightBin::bin(const Mat4& viewMat,
 	const U visibleProbeCount = rqueue.m_reflectionProbes.getSize();
 	const U visibleProbeCount = rqueue.m_reflectionProbes.getSize();
 	const U visibleDecalCount = rqueue.m_decals.getSize();
 	const U visibleDecalCount = rqueue.m_decals.getSize();
 
 
-	ANKI_TRACE_INC_COUNTER(RENDERER_LIGHTS, visiblePointLightsCount + visibleSpotLightsCount);
+	ANKI_TRACE_INC_COUNTER(R_LIGHTS, visiblePointLightsCount + visibleSpotLightsCount);
 
 
 	//
 	//
 	// Write the lights and tiles UBOs
 	// Write the lights and tiles UBOs
 	//
 	//
 	Array<WriteLightsTask, ThreadPool::MAX_THREADS> tasks;
 	Array<WriteLightsTask, ThreadPool::MAX_THREADS> tasks;
-	LightBinContext ctx(frameAlloc);
+	BinContext ctx(frameAlloc);
 	ctx.m_viewMat = viewMat;
 	ctx.m_viewMat = viewMat;
 	ctx.m_viewProjMat = viewProjMat;
 	ctx.m_viewProjMat = viewProjMat;
 	ctx.m_camTrf = camTrf;
 	ctx.m_camTrf = camTrf;
@@ -498,157 +498,163 @@ Error LightBin::bin(const Mat4& viewMat,
 	return Error::NONE;
 	return Error::NONE;
 }
 }
 
 
-void LightBin::binLights(U32 threadId, PtrSize threadsCount, LightBinContext& ctx)
+void LightBin::binLights(U32 threadId, PtrSize threadsCount, BinContext& ctx)
 {
 {
-	ANKI_TRACE_START_EVENT(RENDERER_LIGHT_BINNING);
 	U clusterCount = m_clusterCount;
 	U clusterCount = m_clusterCount;
 	PtrSize start, end;
 	PtrSize start, end;
 
 
 	//
 	//
 	// Initialize the temp clusters
 	// Initialize the temp clusters
 	//
 	//
-	ThreadPoolTask::choseStartEnd(threadId, threadsCount, clusterCount, start, end);
-
-	for(U i = start; i < end; ++i)
 	{
 	{
-		ctx.m_tempClusters[i].reset();
+		ANKI_TRACE_SCOPED_EVENT(R_LIGHT_BINNING);
+
+		ThreadPoolTask::choseStartEnd(threadId, threadsCount, clusterCount, start, end);
+
+		for(U i = start; i < end; ++i)
+		{
+			ctx.m_tempClusters[i].reset();
+		}
 	}
 	}
 
 
-	ANKI_TRACE_STOP_EVENT(RENDERER_LIGHT_BINNING);
 	m_barrier.wait();
 	m_barrier.wait();
-	ANKI_TRACE_START_EVENT(RENDERER_LIGHT_BINNING);
 
 
 	//
 	//
 	// Iterate lights and probes and bin them
 	// Iterate lights and probes and bin them
 	//
 	//
-	ClustererTestResult testResult;
-	m_clusterer.initTestResults(ctx.m_alloc, testResult);
-	U lightCount = ctx.m_vPointLights.getSize() + ctx.m_vSpotLights.getSize();
-	U totalCount = lightCount + ctx.m_vProbes.getSize() + ctx.m_vDecals.getSize();
-
-	const U TO_BIN_COUNT = 1;
-	while((start = ctx.m_count2.fetchAdd(TO_BIN_COUNT)) < totalCount)
 	{
 	{
-		end = min<U>(start + TO_BIN_COUNT, totalCount);
+		ANKI_TRACE_SCOPED_EVENT(R_LIGHT_BINNING);
 
 
-		for(U j = start; j < end; ++j)
+		ClustererTestResult testResult;
+		m_clusterer.initTestResults(ctx.m_alloc, testResult);
+		U lightCount = ctx.m_vPointLights.getSize() + ctx.m_vSpotLights.getSize();
+		U totalCount = lightCount + ctx.m_vProbes.getSize() + ctx.m_vDecals.getSize();
+
+		const U TO_BIN_COUNT = 1;
+		while((start = ctx.m_count2.fetchAdd(TO_BIN_COUNT)) < totalCount)
 		{
 		{
-			if(j >= lightCount + ctx.m_vDecals.getSize())
-			{
-				U i = j - (lightCount + ctx.m_vDecals.getSize());
-				writeAndBinProbe(ctx.m_vProbes[i], ctx, testResult);
-			}
-			else if(j >= ctx.m_vPointLights.getSize() + ctx.m_vDecals.getSize())
-			{
-				U i = j - (ctx.m_vPointLights.getSize() + ctx.m_vDecals.getSize());
-				writeAndBinSpotLight(ctx.m_vSpotLights[i], ctx, testResult);
-			}
-			else if(j >= ctx.m_vDecals.getSize())
-			{
-				U i = j - ctx.m_vDecals.getSize();
-				writeAndBinPointLight(ctx.m_vPointLights[i], ctx, testResult);
-			}
-			else
+			end = min<U>(start + TO_BIN_COUNT, totalCount);
+
+			for(U j = start; j < end; ++j)
 			{
 			{
-				U i = j;
-				writeAndBinDecal(ctx.m_vDecals[i], ctx, testResult);
+				if(j >= lightCount + ctx.m_vDecals.getSize())
+				{
+					U i = j - (lightCount + ctx.m_vDecals.getSize());
+					writeAndBinProbe(ctx.m_vProbes[i], ctx, testResult);
+				}
+				else if(j >= ctx.m_vPointLights.getSize() + ctx.m_vDecals.getSize())
+				{
+					U i = j - (ctx.m_vPointLights.getSize() + ctx.m_vDecals.getSize());
+					writeAndBinSpotLight(ctx.m_vSpotLights[i], ctx, testResult);
+				}
+				else if(j >= ctx.m_vDecals.getSize())
+				{
+					U i = j - ctx.m_vDecals.getSize();
+					writeAndBinPointLight(ctx.m_vPointLights[i], ctx, testResult);
+				}
+				else
+				{
+					U i = j;
+					writeAndBinDecal(ctx.m_vDecals[i], ctx, testResult);
+				}
 			}
 			}
 		}
 		}
 	}
 	}
 
 
+	m_barrier.wait();
+
 	//
 	//
 	// Last thing, update the real clusters
 	// Last thing, update the real clusters
 	//
 	//
-	ANKI_TRACE_STOP_EVENT(RENDERER_LIGHT_BINNING);
-	m_barrier.wait();
-	ANKI_TRACE_START_EVENT(RENDERER_LIGHT_BINNING);
 
 
-	// Run per cluster
-	const U CLUSTER_GROUP = 16;
-	while((start = ctx.m_count.fetchAdd(CLUSTER_GROUP)) < clusterCount)
 	{
 	{
-		end = min<U>(start + CLUSTER_GROUP, clusterCount);
+		ANKI_TRACE_SCOPED_EVENT(R_LIGHT_BINNING);
 
 
-		for(U i = start; i < end; ++i)
+		// Run per cluster
+		const U CLUSTER_GROUP = 16;
+		while((start = ctx.m_count.fetchAdd(CLUSTER_GROUP)) < clusterCount)
 		{
 		{
-			auto& cluster = ctx.m_tempClusters[i];
-			cluster.normalizeCounts();
+			end = min<U>(start + CLUSTER_GROUP, clusterCount);
 
 
-			const U countP = cluster.m_pointCount.get();
-			const U countS = cluster.m_spotCount.get();
-			const U countProbe = cluster.m_probeCount.get();
-			const U countDecal = cluster.m_decalCount.get();
-			const U count = countP + countS + countProbe + countDecal;
-
-			auto& c = ctx.m_clusters[i];
-			c.m_firstIdx = 0; // Point to the first empty indices
-
-			// Early exit
-			if(ANKI_UNLIKELY(count == 0))
+			for(U i = start; i < end; ++i)
 			{
 			{
-				continue;
-			}
+				auto& cluster = ctx.m_tempClusters[i];
+				cluster.normalizeCounts();
 
 
-			// Check if the previous cluster contains the same lights as this one and if yes then merge them. This will
-			// avoid allocating new IDs (and thrashing GPU caches).
-			cluster.sortLightIds();
-			if(i != start)
-			{
-				const auto& clusterB = ctx.m_tempClusters[i - 1];
+				const U countP = cluster.m_pointCount.get();
+				const U countS = cluster.m_spotCount.get();
+				const U countProbe = cluster.m_probeCount.get();
+				const U countDecal = cluster.m_decalCount.get();
+				const U count = countP + countS + countProbe + countDecal;
+
+				auto& c = ctx.m_clusters[i];
+				c.m_firstIdx = 0; // Point to the first empty indices
 
 
-				if(cluster == clusterB)
+				// Early exit
+				if(ANKI_UNLIKELY(count == 0))
 				{
 				{
-					c.m_firstIdx = ctx.m_clusters[i - 1].m_firstIdx;
 					continue;
 					continue;
 				}
 				}
-			}
 
 
-			U offset = ctx.m_lightIdsCount.fetchAdd(count + SIZE_IDX_COUNT);
-			U initialOffset = offset;
-			(void)initialOffset;
-
-			if(offset + count + SIZE_IDX_COUNT <= ctx.m_maxLightIndices)
-			{
-				c.m_firstIdx = offset;
-
-				ctx.m_lightIds[offset++] = countDecal;
-				for(U i = 0; i < countDecal; ++i)
+				// Check if the previous cluster contains the same lights as this one and if yes then merge them. This
+				// will avoid allocating new IDs (and thrashing GPU caches).
+				cluster.sortLightIds();
+				if(i != start)
 				{
 				{
-					ctx.m_lightIds[offset++] = cluster.m_decalIds[i].getIndex();
-				}
+					const auto& clusterB = ctx.m_tempClusters[i - 1];
 
 
-				ctx.m_lightIds[offset++] = countP;
-				for(U i = 0; i < countP; ++i)
-				{
-					ctx.m_lightIds[offset++] = cluster.m_pointIds[i].getIndex();
+					if(cluster == clusterB)
+					{
+						c.m_firstIdx = ctx.m_clusters[i - 1].m_firstIdx;
+						continue;
+					}
 				}
 				}
 
 
-				ctx.m_lightIds[offset++] = countS;
-				for(U i = 0; i < countS; ++i)
+				U offset = ctx.m_lightIdsCount.fetchAdd(count + SIZE_IDX_COUNT);
+				U initialOffset = offset;
+				(void)initialOffset;
+
+				if(offset + count + SIZE_IDX_COUNT <= ctx.m_maxLightIndices)
 				{
 				{
-					ctx.m_lightIds[offset++] = cluster.m_spotIds[i].getIndex();
+					c.m_firstIdx = offset;
+
+					ctx.m_lightIds[offset++] = countDecal;
+					for(U i = 0; i < countDecal; ++i)
+					{
+						ctx.m_lightIds[offset++] = cluster.m_decalIds[i].getIndex();
+					}
+
+					ctx.m_lightIds[offset++] = countP;
+					for(U i = 0; i < countP; ++i)
+					{
+						ctx.m_lightIds[offset++] = cluster.m_pointIds[i].getIndex();
+					}
+
+					ctx.m_lightIds[offset++] = countS;
+					for(U i = 0; i < countS; ++i)
+					{
+						ctx.m_lightIds[offset++] = cluster.m_spotIds[i].getIndex();
+					}
+
+					ctx.m_lightIds[offset++] = countProbe;
+					for(U i = 0; i < countProbe; ++i)
+					{
+						ctx.m_lightIds[offset++] = cluster.m_probeIds[i].getIndex();
+					}
+
+					ANKI_ASSERT(offset - initialOffset == count + SIZE_IDX_COUNT);
 				}
 				}
-
-				ctx.m_lightIds[offset++] = countProbe;
-				for(U i = 0; i < countProbe; ++i)
+				else
 				{
 				{
-					ctx.m_lightIds[offset++] = cluster.m_probeIds[i].getIndex();
+					ANKI_R_LOGW("Light IDs buffer too small");
 				}
 				}
-
-				ANKI_ASSERT(offset - initialOffset == count + SIZE_IDX_COUNT);
-			}
-			else
-			{
-				ANKI_R_LOGW("Light IDs buffer too small");
-			}
-		} // end for
-	} // end while
-
-	ANKI_TRACE_STOP_EVENT(RENDERER_LIGHT_BINNING);
+			} // end for
+		} // end while
+	} // scope
 }
 }
 
 
 void LightBin::writeAndBinPointLight(
 void LightBin::writeAndBinPointLight(
-	const PointLightQueueElement& lightEl, LightBinContext& ctx, ClustererTestResult& testResult)
+	const PointLightQueueElement& lightEl, BinContext& ctx, ClustererTestResult& testResult)
 {
 {
 	// Get GPU light
 	// Get GPU light
 	I idx = ctx.m_pointLightsCount.fetchAdd(1);
 	I idx = ctx.m_pointLightsCount.fetchAdd(1);
@@ -694,7 +700,7 @@ void LightBin::writeAndBinPointLight(
 }
 }
 
 
 void LightBin::writeAndBinSpotLight(
 void LightBin::writeAndBinSpotLight(
-	const SpotLightQueueElement& lightEl, LightBinContext& ctx, ClustererTestResult& testResult)
+	const SpotLightQueueElement& lightEl, BinContext& ctx, ClustererTestResult& testResult)
 {
 {
 	I idx = ctx.m_spotLightsCount.fetchAdd(1);
 	I idx = ctx.m_spotLightsCount.fetchAdd(1);
 
 
@@ -748,7 +754,7 @@ void LightBin::writeAndBinSpotLight(
 }
 }
 
 
 void LightBin::writeAndBinProbe(
 void LightBin::writeAndBinProbe(
-	const ReflectionProbeQueueElement& probeEl, LightBinContext& ctx, ClustererTestResult& testResult)
+	const ReflectionProbeQueueElement& probeEl, BinContext& ctx, ClustererTestResult& testResult)
 {
 {
 	// Write it
 	// Write it
 	ShaderProbe probe;
 	ShaderProbe probe;
@@ -783,7 +789,7 @@ void LightBin::writeAndBinProbe(
 	}
 	}
 }
 }
 
 
-void LightBin::writeAndBinDecal(const DecalQueueElement& decalEl, LightBinContext& ctx, ClustererTestResult& testResult)
+void LightBin::writeAndBinDecal(const DecalQueueElement& decalEl, BinContext& ctx, ClustererTestResult& testResult)
 {
 {
 	I idx = ctx.m_decalCount.fetchAdd(1);
 	I idx = ctx.m_decalCount.fetchAdd(1);
 	ShaderDecal& decal = ctx.m_decals[idx];
 	ShaderDecal& decal = ctx.m_decals[idx];

+ 16 - 11
src/anki/renderer/LightBin.h

@@ -10,9 +10,6 @@
 namespace anki
 namespace anki
 {
 {
 
 
-// Forward
-class LightBinContext;
-
 /// @addtogroup renderer
 /// @addtogroup renderer
 /// @{
 /// @{
 
 
@@ -62,6 +59,17 @@ public:
 	}
 	}
 
 
 private:
 private:
+	class BinContext;
+	class ShaderCluster;
+	class ShaderPointLight;
+	class ShaderSpotLight;
+	class ShaderProbe;
+	class ShaderDecal;
+	class ClusterLightIndex;
+	class ClusterProbeIndex;
+	class ClusterData;
+	class WriteLightsTask;
+
 	GenericMemoryPoolAllocator<U8> m_alloc;
 	GenericMemoryPoolAllocator<U8> m_alloc;
 	Clusterer m_clusterer;
 	Clusterer m_clusterer;
 	U32 m_clusterCount = 0;
 	U32 m_clusterCount = 0;
@@ -69,18 +77,15 @@ private:
 	StagingGpuMemoryManager* m_stagingMem = nullptr;
 	StagingGpuMemoryManager* m_stagingMem = nullptr;
 	Barrier m_barrier;
 	Barrier m_barrier;
 
 
-	void binLights(U32 threadId, PtrSize threadsCount, LightBinContext& ctx);
+	void binLights(U32 threadId, PtrSize threadsCount, BinContext& ctx);
 
 
-	void writeAndBinPointLight(
-		const PointLightQueueElement& lightEl, LightBinContext& ctx, ClustererTestResult& testResult);
+	void writeAndBinPointLight(const PointLightQueueElement& lightEl, BinContext& ctx, ClustererTestResult& testResult);
 
 
-	void writeAndBinSpotLight(
-		const SpotLightQueueElement& lightEl, LightBinContext& ctx, ClustererTestResult& testResult);
+	void writeAndBinSpotLight(const SpotLightQueueElement& lightEl, BinContext& ctx, ClustererTestResult& testResult);
 
 
-	void writeAndBinProbe(
-		const ReflectionProbeQueueElement& probe, LightBinContext& ctx, ClustererTestResult& testResult);
+	void writeAndBinProbe(const ReflectionProbeQueueElement& probe, BinContext& ctx, ClustererTestResult& testResult);
 
 
-	void writeAndBinDecal(const DecalQueueElement& decal, LightBinContext& ctx, ClustererTestResult& testResult);
+	void writeAndBinDecal(const DecalQueueElement& decal, BinContext& ctx, ClustererTestResult& testResult);
 };
 };
 /// @}
 /// @}
 
 

+ 1 - 1
src/anki/renderer/Renderer.cpp

@@ -50,7 +50,7 @@ Error Renderer::init(ThreadPool* threadpool,
 	Timestamp* globTimestamp,
 	Timestamp* globTimestamp,
 	Bool willDrawToDefaultFbo)
 	Bool willDrawToDefaultFbo)
 {
 {
-	ANKI_TRACE_SCOPED_EVENT(RENDERER_INIT);
+	ANKI_TRACE_SCOPED_EVENT(R_INIT);
 
 
 	m_globTimestamp = globTimestamp;
 	m_globTimestamp = globTimestamp;
 	m_threadpool = threadpool;
 	m_threadpool = threadpool;

+ 9 - 0
src/anki/renderer/RendererObject.h

@@ -56,6 +56,15 @@ protected:
 		cmdb->drawArrays(PrimitiveTopology::TRIANGLES, 3, 1);
 		cmdb->drawArrays(PrimitiveTopology::TRIANGLES, 3, 1);
 	}
 	}
 
 
+	/// Dispatch a compute job equivelent to drawQuad
+	static void dispatchPPCompute(
+		CommandBufferPtr& cmdb, U32 workgroupSizeX, U32 workgroupSizeY, U32 outImageWidth, U32 outImageHeight)
+	{
+		const U sizeX = (outImageWidth + workgroupSizeX - 1) / workgroupSizeX;
+		const U sizeY = (outImageHeight + workgroupSizeY - 1) / workgroupSizeY;
+		cmdb->dispatchCompute(sizeX, sizeY, 1);
+	}
+
 	template<typename TPtr>
 	template<typename TPtr>
 	TPtr allocateUniforms(PtrSize size, StagingGpuMemoryToken& token)
 	TPtr allocateUniforms(PtrSize size, StagingGpuMemoryToken& token)
 	{
 	{

+ 4 - 4
src/anki/renderer/ShadowMapping.cpp

@@ -143,7 +143,7 @@ Error ShadowMapping::initInternal(const ConfigSet& cfg)
 void ShadowMapping::runEsm(RenderPassWorkContext& rgraphCtx)
 void ShadowMapping::runEsm(RenderPassWorkContext& rgraphCtx)
 {
 {
 	ANKI_ASSERT(m_esmResolveWorkItems.getSize());
 	ANKI_ASSERT(m_esmResolveWorkItems.getSize());
-	ANKI_TRACE_SCOPED_EVENT(RENDER_SM);
+	ANKI_TRACE_SCOPED_EVENT(R_SM);
 
 
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 
 
@@ -153,7 +153,7 @@ void ShadowMapping::runEsm(RenderPassWorkContext& rgraphCtx)
 
 
 	for(const EsmResolveWorkItem& workItem : m_esmResolveWorkItems)
 	for(const EsmResolveWorkItem& workItem : m_esmResolveWorkItems)
 	{
 	{
-		ANKI_TRACE_INC_COUNTER(RENDERER_SHADOW_PASSES, 1);
+		ANKI_TRACE_INC_COUNTER(R_SHADOW_PASSES, 1);
 
 
 		cmdb->setViewport(
 		cmdb->setViewport(
 			workItem.m_viewportOut[0], workItem.m_viewportOut[1], workItem.m_viewportOut[2], workItem.m_viewportOut[3]);
 			workItem.m_viewportOut[0], workItem.m_viewportOut[1], workItem.m_viewportOut[2], workItem.m_viewportOut[3]);
@@ -174,7 +174,7 @@ void ShadowMapping::runEsm(RenderPassWorkContext& rgraphCtx)
 void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
 void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
 {
 {
 	ANKI_ASSERT(m_scratchWorkItems.getSize());
 	ANKI_ASSERT(m_scratchWorkItems.getSize());
-	ANKI_TRACE_SCOPED_EVENT(RENDER_SM);
+	ANKI_TRACE_SCOPED_EVENT(R_SM);
 
 
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	const U threadIdx = rgraphCtx.m_currentSecondLevelCommandBufferIndex;
 	const U threadIdx = rgraphCtx.m_currentSecondLevelCommandBufferIndex;
@@ -202,7 +202,7 @@ void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
 
 
 void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
 void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
 {
 {
-	ANKI_TRACE_SCOPED_EVENT(RENDER_SM);
+	ANKI_TRACE_SCOPED_EVENT(R_SM);
 
 
 	// First process the lights
 	// First process the lights
 	U32 threadCountForScratchPass = 0;
 	U32 threadCountForScratchPass = 0;

+ 1 - 4
src/anki/renderer/Ssao.cpp

@@ -178,10 +178,7 @@ void Ssao::runBlur(RenderPassWorkContext& rgraphCtx)
 	if(m_blurUseCompute)
 	if(m_blurUseCompute)
 	{
 	{
 		rgraphCtx.bindImage(0, 0, m_runCtx.m_rts[1], TextureSubresourceInfo());
 		rgraphCtx.bindImage(0, 0, m_runCtx.m_rts[1], TextureSubresourceInfo());
-
-		const U sizeX = (m_width + m_workgroupSize[0] - 1) / m_workgroupSize[0];
-		const U sizeY = (m_height + m_workgroupSize[1] - 1) / m_workgroupSize[1];
-		cmdb->dispatchCompute(sizeX, sizeY, 1);
+		dispatchPPCompute(cmdb, m_workgroupSize[0], m_workgroupSize[1], m_width, m_height);
 	}
 	}
 	else
 	else
 	{
 	{

+ 2 - 1
src/anki/renderer/TemporalAA.cpp

@@ -56,7 +56,8 @@ Error TemporalAA::initInternal(const ConfigSet& config)
 		m_rtTextures[i] = m_r->createAndClearRenderTarget(m_r->create2DRenderTargetInitInfo(m_r->getWidth(),
 		m_rtTextures[i] = m_r->createAndClearRenderTarget(m_r->create2DRenderTargetInitInfo(m_r->getWidth(),
 			m_r->getHeight(),
 			m_r->getHeight(),
 			LIGHT_SHADING_COLOR_ATTACHMENT_PIXEL_FORMAT,
 			LIGHT_SHADING_COLOR_ATTACHMENT_PIXEL_FORMAT,
-			TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE,
+			TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE
+				| TextureUsageBit::SAMPLED_COMPUTE,
 			"TemporalAA"));
 			"TemporalAA"));
 	}
 	}
 
 

+ 1 - 1
src/anki/resource/AsyncLoader.cpp

@@ -125,7 +125,7 @@ Error AsyncLoader::threadWorker()
 			AsyncLoaderTaskContext ctx;
 			AsyncLoaderTaskContext ctx;
 
 
 			{
 			{
-				ANKI_TRACE_SCOPED_EVENT(RESOURCE_ASYNC_TASK);
+				ANKI_TRACE_SCOPED_EVENT(RSRC_ASYNC_TASK);
 				err = (*task)(ctx);
 				err = (*task)(ctx);
 			}
 			}
 
 

+ 1 - 1
src/anki/resource/MeshLoader.cpp

@@ -310,7 +310,7 @@ Error MeshLoader::storeIndicesAndPosition(DynamicArrayAuto<U32>& indices, Dynami
 		// Copy
 		// Copy
 		for(U i = 0; i < m_header.m_totalVertexCount; ++i)
 		for(U i = 0; i < m_header.m_totalVertexCount; ++i)
 		{
 		{
-			Vec3 vert;
+			Vec3 vert(0.0f);
 			if(attrib.m_format == Format::R32G32B32_SFLOAT)
 			if(attrib.m_format == Format::R32G32B32_SFLOAT)
 			{
 			{
 				vert = *reinterpret_cast<Vec3*>(&staging[i * buffInfo.m_vertexStride + attrib.m_relativeOffset]);
 				vert = *reinterpret_cast<Vec3*>(&staging[i * buffInfo.m_vertexStride + attrib.m_relativeOffset]);

+ 5 - 5
src/anki/resource/ResourceFilesystem.cpp

@@ -25,25 +25,25 @@ public:
 
 
 	ANKI_USE_RESULT Error read(void* buff, PtrSize size) override
 	ANKI_USE_RESULT Error read(void* buff, PtrSize size) override
 	{
 	{
-		ANKI_TRACE_SCOPED_EVENT(RESOURCE_FILE_READ);
+		ANKI_TRACE_SCOPED_EVENT(RSRC_FILE_READ);
 		return m_file.read(buff, size);
 		return m_file.read(buff, size);
 	}
 	}
 
 
 	ANKI_USE_RESULT Error readAllText(GenericMemoryPoolAllocator<U8> alloc, String& out) override
 	ANKI_USE_RESULT Error readAllText(GenericMemoryPoolAllocator<U8> alloc, String& out) override
 	{
 	{
-		ANKI_TRACE_SCOPED_EVENT(RESOURCE_FILE_READ);
+		ANKI_TRACE_SCOPED_EVENT(RSRC_FILE_READ);
 		return m_file.readAllText(alloc, out);
 		return m_file.readAllText(alloc, out);
 	}
 	}
 
 
 	ANKI_USE_RESULT Error readU32(U32& u) override
 	ANKI_USE_RESULT Error readU32(U32& u) override
 	{
 	{
-		ANKI_TRACE_SCOPED_EVENT(RESOURCE_FILE_READ);
+		ANKI_TRACE_SCOPED_EVENT(RSRC_FILE_READ);
 		return m_file.readU32(u);
 		return m_file.readU32(u);
 	}
 	}
 
 
 	ANKI_USE_RESULT Error readF32(F32& f) override
 	ANKI_USE_RESULT Error readF32(F32& f) override
 	{
 	{
-		ANKI_TRACE_SCOPED_EVENT(RESOURCE_FILE_READ);
+		ANKI_TRACE_SCOPED_EVENT(RSRC_FILE_READ);
 		return m_file.readF32(f);
 		return m_file.readF32(f);
 	}
 	}
 
 
@@ -128,7 +128,7 @@ public:
 
 
 	ANKI_USE_RESULT Error read(void* buff, PtrSize size) override
 	ANKI_USE_RESULT Error read(void* buff, PtrSize size) override
 	{
 	{
-		ANKI_TRACE_SCOPED_EVENT(RESOURCE_FILE_READ);
+		ANKI_TRACE_SCOPED_EVENT(RSRC_FILE_READ);
 
 
 		I64 readSize = unzReadCurrentFile(m_archive, buff, size);
 		I64 readSize = unzReadCurrentFile(m_archive, buff, size);
 
 

+ 1 - 1
src/anki/resource/TransferGpuAllocator.cpp

@@ -118,7 +118,7 @@ Error TransferGpuAllocator::init(PtrSize maxSize, GrManager* gr, ResourceAllocat
 
 
 Error TransferGpuAllocator::allocate(PtrSize size, TransferGpuAllocatorHandle& handle)
 Error TransferGpuAllocator::allocate(PtrSize size, TransferGpuAllocatorHandle& handle)
 {
 {
-	ANKI_TRACE_SCOPED_EVENT(RESOURCE_ALLOCATE_TRANSFER);
+	ANKI_TRACE_SCOPED_EVENT(RSRC_ALLOCATE_TRANSFER);
 
 
 	const PtrSize frameSize = m_maxAllocSize / FRAME_COUNT;
 	const PtrSize frameSize = m_maxAllocSize / FRAME_COUNT;
 
 

+ 30 - 28
src/anki/scene/SceneGraph.cpp

@@ -200,40 +200,43 @@ Error SceneGraph::update(Second prevUpdateTime, Second crntTime)
 	m_frameAlloc.getMemoryPool().reset();
 	m_frameAlloc.getMemoryPool().reset();
 
 
 	// Delete stuff
 	// Delete stuff
-	ANKI_TRACE_START_EVENT(SCENE_DELETE_STUFF);
-	m_events.deleteEventsMarkedForDeletion();
-	deleteNodesMarkedForDeletion();
-	ANKI_TRACE_STOP_EVENT(SCENE_DELETE_STUFF);
+	{
+		ANKI_TRACE_SCOPED_EVENT(SCENE_MARKED_FOR_DELETION);
+		m_events.deleteEventsMarkedForDeletion();
+		deleteNodesMarkedForDeletion();
+	}
 
 
 	ThreadPool& threadPool = *m_threadpool;
 	ThreadPool& threadPool = *m_threadpool;
 	(void)threadPool;
 	(void)threadPool;
 
 
 	// Update
 	// Update
-	ANKI_TRACE_START_EVENT(SCENE_PHYSICS_UPDATE);
-	m_physics->updateAsync(crntTime - prevUpdateTime);
-	m_physics->waitUpdate();
-	ANKI_TRACE_STOP_EVENT(SCENE_PHYSICS_UPDATE);
-
-	ANKI_TRACE_START_EVENT(SCENE_NODES_UPDATE);
-	ANKI_CHECK(m_events.updateAllEvents(prevUpdateTime, crntTime));
-
-	// Then the rest
-	Array<UpdateSceneNodesTask, ThreadPool::MAX_THREADS> jobs2;
-	UpdateSceneNodesCtx updateCtx;
-	updateCtx.m_scene = this;
-	updateCtx.m_crntNode = m_nodes.getBegin();
-	updateCtx.m_prevUpdateTime = prevUpdateTime;
-	updateCtx.m_crntTime = crntTime;
-
-	for(U i = 0; i < threadPool.getThreadCount(); i++)
 	{
 	{
-		UpdateSceneNodesTask& job = jobs2[i];
-		job.m_ctx = &updateCtx;
-		threadPool.assignNewTask(i, &job);
+		ANKI_TRACE_SCOPED_EVENT(SCENE_PHYSICS_UPDATE);
+		m_physics->updateAsync(crntTime - prevUpdateTime);
+		m_physics->waitUpdate();
 	}
 	}
 
 
-	ANKI_CHECK(threadPool.waitForAllThreadsToFinish());
-	ANKI_TRACE_STOP_EVENT(SCENE_NODES_UPDATE);
+	{
+		ANKI_TRACE_SCOPED_EVENT(SCENE_NODES_UPDATE);
+		ANKI_CHECK(m_events.updateAllEvents(prevUpdateTime, crntTime));
+
+		// Then the rest
+		Array<UpdateSceneNodesTask, ThreadPool::MAX_THREADS> jobs2;
+		UpdateSceneNodesCtx updateCtx;
+		updateCtx.m_scene = this;
+		updateCtx.m_crntNode = m_nodes.getBegin();
+		updateCtx.m_prevUpdateTime = prevUpdateTime;
+		updateCtx.m_crntTime = crntTime;
+
+		for(U i = 0; i < threadPool.getThreadCount(); i++)
+		{
+			UpdateSceneNodesTask& job = jobs2[i];
+			job.m_ctx = &updateCtx;
+			threadPool.assignNewTask(i, &job);
+		}
+
+		ANKI_CHECK(threadPool.waitForAllThreadsToFinish());
+	}
 
 
 	m_stats.m_updateTime = HighRezTimer::getCurrentTime() - m_stats.m_updateTime;
 	m_stats.m_updateTime = HighRezTimer::getCurrentTime() - m_stats.m_updateTime;
 	return Error::NONE;
 	return Error::NONE;
@@ -279,7 +282,7 @@ Error SceneGraph::updateNode(Second prevTime, Second crntTime, SceneNode& node)
 
 
 Error SceneGraph::updateNodes(UpdateSceneNodesCtx& ctx) const
 Error SceneGraph::updateNodes(UpdateSceneNodesCtx& ctx) const
 {
 {
-	ANKI_TRACE_START_EVENT(SCENE_NODES_UPDATE);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_NODES_UPDATE);
 
 
 	IntrusiveList<SceneNode>::Iterator& it = ctx.m_crntNode;
 	IntrusiveList<SceneNode>::Iterator& it = ctx.m_crntNode;
 	IntrusiveList<SceneNode>::ConstIterator end = m_nodes.getEnd();
 	IntrusiveList<SceneNode>::ConstIterator end = m_nodes.getEnd();
@@ -324,7 +327,6 @@ Error SceneGraph::updateNodes(UpdateSceneNodesCtx& ctx) const
 		}
 		}
 	}
 	}
 
 
-	ANKI_TRACE_STOP_EVENT(SCENE_NODES_UPDATE);
 	return err;
 	return err;
 }
 }
 
 

+ 1 - 1
src/anki/scene/SceneGraph.h

@@ -245,7 +245,7 @@ private:
 
 
 	IntrusiveList<SceneNode> m_nodes;
 	IntrusiveList<SceneNode> m_nodes;
 	U32 m_nodesCount = 0;
 	U32 m_nodesCount = 0;
-	HashMap<CString, SceneNode*, CStringHasher> m_nodesDict;
+	HashMap<CString, SceneNode*> m_nodesDict;
 
 
 	SceneNode* m_mainCam = nullptr;
 	SceneNode* m_mainCam = nullptr;
 	Timestamp m_activeCameraChangeTimestamp = 0;
 	Timestamp m_activeCameraChangeTimestamp = 0;

+ 1 - 2
src/anki/scene/SoftwareRasterizer.cpp

@@ -300,9 +300,8 @@ void SoftwareRasterizer::rasterizeTriangle(const Vec4* tri)
 
 
 Bool SoftwareRasterizer::visibilityTest(const CollisionShape& cs, const Aabb& aabb) const
 Bool SoftwareRasterizer::visibilityTest(const CollisionShape& cs, const Aabb& aabb) const
 {
 {
-	ANKI_TRACE_START_EVENT(SCENE_RASTERIZER_TEST);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_RASTERIZER_TEST);
 	Bool inside = visibilityTestInternal(cs, aabb);
 	Bool inside = visibilityTestInternal(cs, aabb);
-	ANKI_TRACE_STOP_EVENT(SCENE_RASTERIZER_TEST);
 
 
 	return inside;
 	return inside;
 }
 }

+ 1 - 2
src/anki/scene/VisibilityInternal.h

@@ -195,11 +195,10 @@ public:
 private:
 private:
 	void gather()
 	void gather()
 	{
 	{
-		ANKI_TRACE_START_EVENT(SCENE_VISIBILITY_ITERATE_SECTORS);
+		ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_ITERATE_SECTORS);
 		U testIdx = m_visCtx->m_testsCount.fetchAdd(1);
 		U testIdx = m_visCtx->m_testsCount.fetchAdd(1);
 
 
 		m_visCtx->m_scene->getSectorGroup().findVisibleNodes(*m_frc, testIdx, m_r, m_sectorsCtx);
 		m_visCtx->m_scene->getSectorGroup().findVisibleNodes(*m_frc, testIdx, m_r, m_sectorsCtx);
-		ANKI_TRACE_STOP_EVENT(SCENE_VISIBILITY_ITERATE_SECTORS);
 	}
 	}
 };
 };
 
 

+ 6 - 0
src/anki/util/Allocator.h

@@ -117,6 +117,12 @@ public:
 		return *this;
 		return *this;
 	}
 	}
 
 
+	/// Check if it's initialized.
+	operator bool() const
+	{
+		return m_pool != nullptr;
+	}
+
 	/// Get the address of a reference
 	/// Get the address of a reference
 	pointer address(reference x) const
 	pointer address(reference x) const
 	{
 	{

+ 1 - 1
src/anki/util/CMakeLists.txt

@@ -1,4 +1,4 @@
-set(SOURCES Assert.cpp Functions.cpp File.cpp Filesystem.cpp Memory.cpp System.cpp HighRezTimer.cpp ThreadPool.cpp ThreadHive.cpp Hash.cpp Logger.cpp String.cpp StringList.cpp)
+set(SOURCES Assert.cpp Functions.cpp File.cpp Filesystem.cpp Memory.cpp System.cpp HighRezTimer.cpp ThreadPool.cpp ThreadHive.cpp Hash.cpp Logger.cpp String.cpp StringList.cpp Tracer.cpp)
 
 
 if(LINUX OR ANDROID OR MACOS)
 if(LINUX OR ANDROID OR MACOS)
 	set(SOURCES ${SOURCES} HighRezTimerPosix.cpp FilesystemPosix.cpp ThreadPosix.cpp)
 	set(SOURCES ${SOURCES} HighRezTimerPosix.cpp FilesystemPosix.cpp ThreadPosix.cpp)

+ 1 - 1
src/anki/util/ObjectAllocator.h

@@ -10,7 +10,7 @@
 namespace anki
 namespace anki
 {
 {
 
 
-/// @addtogroup util
+/// @addtogroup util_containers
 /// @{
 /// @{
 
 
 /// A simple allocator for objects of similar types.
 /// A simple allocator for objects of similar types.

+ 1 - 0
src/anki/util/StdTypes.h

@@ -75,6 +75,7 @@ using Bool8 = I8; ///< Small 8bit boolean type
 using Bool32 = I32; ///< A 32bit boolean
 using Bool32 = I32; ///< A 32bit boolean
 
 
 using Second = F64; ///< The base time unit is second.
 using Second = F64; ///< The base time unit is second.
+const Second MAX_SECOND = MAX_F64;
 
 
 using Timestamp = U64; ///< Timestamp type.
 using Timestamp = U64; ///< Timestamp type.
 const Timestamp MAX_TIMESTAMP = MAX_U64;
 const Timestamp MAX_TIMESTAMP = MAX_U64;

+ 26 - 30
src/anki/util/String.h

@@ -118,12 +118,12 @@ public:
 	}
 	}
 
 
 	/// Return true if the string is not initialized.
 	/// Return true if the string is not initialized.
-	Bool isEmpty() const
+	bool isEmpty() const
 	{
 	{
 		return m_ptr == nullptr || getLength() == 0;
 		return m_ptr == nullptr || getLength() == 0;
 	}
 	}
 
 
-	Bool operator==(const CString& b) const
+	bool operator==(const CString& b) const
 	{
 	{
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		{
 		{
@@ -135,12 +135,12 @@ public:
 		}
 		}
 	}
 	}
 
 
-	Bool operator!=(const CString& b) const
+	bool operator!=(const CString& b) const
 	{
 	{
 		return !((*this) == b);
 		return !((*this) == b);
 	}
 	}
 
 
-	Bool operator<(const CString& b) const
+	bool operator<(const CString& b) const
 	{
 	{
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		{
 		{
@@ -152,7 +152,7 @@ public:
 		}
 		}
 	}
 	}
 
 
-	Bool operator<=(const CString& b) const
+	bool operator<=(const CString& b) const
 	{
 	{
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		{
 		{
@@ -164,7 +164,7 @@ public:
 		}
 		}
 	}
 	}
 
 
-	Bool operator>(const CString& b) const
+	bool operator>(const CString& b) const
 	{
 	{
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		{
 		{
@@ -176,7 +176,7 @@ public:
 		}
 		}
 	}
 	}
 
 
-	Bool operator>=(const CString& b) const
+	bool operator>=(const CString& b) const
 	{
 	{
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		{
 		{
@@ -234,6 +234,13 @@ public:
 	/// Convert to U32.
 	/// Convert to U32.
 	ANKI_USE_RESULT Error toNumber(U32& out) const;
 	ANKI_USE_RESULT Error toNumber(U32& out) const;
 
 
+	/// Compute the hash.
+	U32 computeHash() const
+	{
+		checkInit();
+		return anki::computeHash(m_ptr, getLength());
+	}
+
 private:
 private:
 	const Char* m_ptr = nullptr;
 	const Char* m_ptr = nullptr;
 
 
@@ -243,17 +250,6 @@ private:
 	}
 	}
 };
 };
 
 
-/// Hasher function for CStrings. Can be used in HashMap.
-class CStringHasher
-{
-public:
-	U64 operator()(CString str)
-	{
-		ANKI_ASSERT(!str.isEmpty());
-		return computeHash(&str[0], str.getLength());
-	}
-};
-
 /// Compare function for CStrings. Can be used in HashMap.
 /// Compare function for CStrings. Can be used in HashMap.
 class CStringCompare
 class CStringCompare
 {
 {
@@ -379,7 +375,7 @@ public:
 	}
 	}
 
 
 	/// Return true if strings are equal
 	/// Return true if strings are equal
-	Bool operator==(const String& b) const
+	bool operator==(const String& b) const
 	{
 	{
 		checkInit();
 		checkInit();
 		b.checkInit();
 		b.checkInit();
@@ -387,13 +383,13 @@ public:
 	}
 	}
 
 
 	/// Return true if strings are not equal
 	/// Return true if strings are not equal
-	Bool operator!=(const String& b) const
+	bool operator!=(const String& b) const
 	{
 	{
 		return !(*this == b);
 		return !(*this == b);
 	}
 	}
 
 
 	/// Return true if this is less than b
 	/// Return true if this is less than b
-	Bool operator<(const String& b) const
+	bool operator<(const String& b) const
 	{
 	{
 		checkInit();
 		checkInit();
 		b.checkInit();
 		b.checkInit();
@@ -401,7 +397,7 @@ public:
 	}
 	}
 
 
 	/// Return true if this is less or equal to b
 	/// Return true if this is less or equal to b
-	Bool operator<=(const String& b) const
+	bool operator<=(const String& b) const
 	{
 	{
 		checkInit();
 		checkInit();
 		b.checkInit();
 		b.checkInit();
@@ -409,7 +405,7 @@ public:
 	}
 	}
 
 
 	/// Return true if this is greater than b
 	/// Return true if this is greater than b
-	Bool operator>(const String& b) const
+	bool operator>(const String& b) const
 	{
 	{
 		checkInit();
 		checkInit();
 		b.checkInit();
 		b.checkInit();
@@ -417,7 +413,7 @@ public:
 	}
 	}
 
 
 	/// Return true if this is greater or equal to b
 	/// Return true if this is greater or equal to b
-	Bool operator>=(const String& b) const
+	bool operator>=(const String& b) const
 	{
 	{
 		checkInit();
 		checkInit();
 		b.checkInit();
 		b.checkInit();
@@ -425,41 +421,41 @@ public:
 	}
 	}
 
 
 	/// Return true if strings are equal
 	/// Return true if strings are equal
-	Bool operator==(const CStringType& cstr) const
+	bool operator==(const CStringType& cstr) const
 	{
 	{
 		checkInit();
 		checkInit();
 		return std::strcmp(&m_data[0], cstr.get()) == 0;
 		return std::strcmp(&m_data[0], cstr.get()) == 0;
 	}
 	}
 
 
 	/// Return true if strings are not equal
 	/// Return true if strings are not equal
-	Bool operator!=(const CStringType& cstr) const
+	bool operator!=(const CStringType& cstr) const
 	{
 	{
 		return !(*this == cstr);
 		return !(*this == cstr);
 	}
 	}
 
 
 	/// Return true if this is less than cstr.
 	/// Return true if this is less than cstr.
-	Bool operator<(const CStringType& cstr) const
+	bool operator<(const CStringType& cstr) const
 	{
 	{
 		checkInit();
 		checkInit();
 		return std::strcmp(&m_data[0], cstr.get()) < 0;
 		return std::strcmp(&m_data[0], cstr.get()) < 0;
 	}
 	}
 
 
 	/// Return true if this is less or equal to cstr.
 	/// Return true if this is less or equal to cstr.
-	Bool operator<=(const CStringType& cstr) const
+	bool operator<=(const CStringType& cstr) const
 	{
 	{
 		checkInit();
 		checkInit();
 		return std::strcmp(&m_data[0], cstr.get()) <= 0;
 		return std::strcmp(&m_data[0], cstr.get()) <= 0;
 	}
 	}
 
 
 	/// Return true if this is greater than cstr.
 	/// Return true if this is greater than cstr.
-	Bool operator>(const CStringType& cstr) const
+	bool operator>(const CStringType& cstr) const
 	{
 	{
 		checkInit();
 		checkInit();
 		return std::strcmp(&m_data[0], cstr.get()) > 0;
 		return std::strcmp(&m_data[0], cstr.get()) > 0;
 	}
 	}
 
 
 	/// Return true if this is greater or equal to cstr.
 	/// Return true if this is greater or equal to cstr.
-	Bool operator>=(const CStringType& cstr) const
+	bool operator>=(const CStringType& cstr) const
 	{
 	{
 		checkInit();
 		checkInit();
 		return std::strcmp(&m_data[0], cstr.get()) >= 0;
 		return std::strcmp(&m_data[0], cstr.get()) >= 0;

+ 571 - 0
src/anki/util/Tracer.cpp

@@ -0,0 +1,571 @@
+// Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <anki/util/Tracer.h>
+#include <anki/util/HighRezTimer.h>
+#include <anki/util/HashMap.h>
+
+namespace anki
+{
+
+/// Lightweight event storage.
+class Tracer::Event
+{
+public:
+	const char* m_name;
+	Second m_timestamp;
+	Second m_duration;
+};
+
+/// Event batch allocation.
+class Tracer::EventsChunk : public IntrusiveListEnabled<EventsChunk>
+{
+public:
+	Array<Event, EVENTS_PER_CHUNK> m_events;
+	U32 m_eventCount = 0;
+};
+
+/// A heavyweight event with more info.
+class Tracer::GatherEvent
+{
+public:
+	CString m_name;
+	Second m_timestamp;
+	Second m_duration;
+	ThreadId m_tid;
+};
+
+/// Lightweight counter storage.
+class Tracer::Counter
+{
+public:
+	const char* m_name;
+	U64 m_value;
+};
+
+/// Counter batch allocation.
+class Tracer::CountersChunk : public IntrusiveListEnabled<CountersChunk>
+{
+public:
+	U64 m_frame;
+	Second m_startFrameTime;
+	Array<Counter, COUNTERS_PER_CHUNK> m_counters;
+	U32 m_counterCount = 0;
+};
+
+/// Heavyweight counter storage.
+class Tracer::GatherCounter
+{
+public:
+	CString m_name;
+	U64 m_value;
+};
+
+/// Thread local storage.
+class Tracer::ThreadLocal
+{
+public:
+	ThreadId m_tid ANKI_DBG_NULLIFY;
+
+	IntrusiveList<CountersChunk> m_counterChunks;
+	IntrusiveList<EventsChunk> m_eventChunks;
+};
+
+thread_local Tracer::ThreadLocal* Tracer::m_threadLocal = nullptr;
+
+/// Storage of counters per frame.
+class Tracer::PerFrameCounters
+{
+public:
+	DynamicArrayAuto<GatherCounter> m_counters;
+	DynamicArrayAuto<GatherCounter> m_tempCounters; ///< A temp storage.
+	U64 m_frame;
+	Second m_startFrameTime;
+
+	PerFrameCounters(GenericMemoryPoolAllocator<U8> alloc)
+		: m_counters(alloc)
+		, m_tempCounters(alloc)
+	{
+	}
+};
+
+/// Context for Tracer::flush().
+class Tracer::FlushCtx
+{
+public:
+	GenericMemoryPoolAllocator<U8> m_alloc;
+	CString m_filename;
+	DynamicArrayAuto<CString> m_counterNames;
+	DynamicArrayAuto<PerFrameCounters> m_counters;
+	DynamicArrayAuto<GatherEvent> m_events;
+
+	FlushCtx(GenericMemoryPoolAllocator<U8> alloc, const CString& filename)
+		: m_alloc(alloc)
+		, m_filename(filename)
+		, m_counterNames(alloc)
+		, m_counters(alloc)
+		, m_events(alloc)
+	{
+	}
+};
+
+Tracer::~Tracer()
+{
+	for(ThreadLocal* threadLocal : m_allThreadLocal)
+	{
+		while(!threadLocal->m_counterChunks.isEmpty())
+		{
+			CountersChunk& chunk = threadLocal->m_counterChunks.getFront();
+			threadLocal->m_counterChunks.popFront();
+			m_alloc.deleteInstance(&chunk);
+		}
+
+		while(!threadLocal->m_eventChunks.isEmpty())
+		{
+			EventsChunk& chunk = threadLocal->m_eventChunks.getFront();
+			threadLocal->m_eventChunks.popFront();
+			m_alloc.deleteInstance(&chunk);
+		}
+
+		m_alloc.deleteInstance(threadLocal);
+	}
+
+	m_allThreadLocal.destroy(m_alloc);
+}
+
+void Tracer::newFrame(U64 frame)
+{
+	ANKI_ASSERT(frame == 0 || frame > m_frame);
+
+	LockGuard<SpinLock> lock(m_frameMtx);
+
+	m_startFrameTime = HighRezTimer::getCurrentTime();
+	m_frame = frame;
+}
+
+Tracer::ThreadLocal& Tracer::getThreadLocal()
+{
+	ThreadLocal* out = m_threadLocal;
+	if(ANKI_UNLIKELY(out == nullptr))
+	{
+		out = m_alloc.newInstance<ThreadLocal>();
+		out->m_tid = Thread::getCurrentThreadId();
+		m_threadLocal = out;
+
+		// Store it
+		LockGuard<Mutex> lock(m_threadLocalMtx);
+		m_allThreadLocal.emplaceBack(m_alloc, out);
+	}
+
+	return *out;
+}
+
+TracerEventHandle Tracer::beginEvent()
+{
+	ThreadLocal& threadLocal = getThreadLocal();
+
+	// Allocate new chunk
+	if(threadLocal.m_eventChunks.isEmpty() || threadLocal.m_eventChunks.getBack().m_eventCount >= EVENTS_PER_CHUNK)
+	{
+		EventsChunk* chunk = m_alloc.newInstance<EventsChunk>();
+		threadLocal.m_eventChunks.pushBack(chunk);
+	}
+
+	EventsChunk& chunk = threadLocal.m_eventChunks.getBack();
+	Event* event = &chunk.m_events[chunk.m_eventCount++];
+	event->m_timestamp = HighRezTimer::getCurrentTime();
+
+	return event;
+}
+
+void Tracer::endEvent(const char* eventName, TracerEventHandle eventHandle)
+{
+	ANKI_ASSERT(eventName);
+	ANKI_ASSERT(eventHandle);
+
+	Event* event = static_cast<Event*>(eventHandle);
+	event->m_name = eventName;
+	event->m_duration = HighRezTimer::getCurrentTime() - event->m_timestamp;
+
+	// Store a counter as well. In ns
+	increaseCounter(eventName, U64(event->m_duration * 1000000000.0));
+}
+
+void Tracer::increaseCounter(const char* counterName, U64 value)
+{
+	ANKI_ASSERT(counterName);
+
+	ThreadLocal& threadLocal = getThreadLocal();
+
+	// Create chunk
+	if(threadLocal.m_counterChunks.isEmpty() || threadLocal.m_counterChunks.getBack().m_frame != m_frame
+		|| threadLocal.m_counterChunks.getBack().m_counterCount >= COUNTERS_PER_CHUNK)
+	{
+		CountersChunk* newChunk = m_alloc.newInstance<CountersChunk>();
+		threadLocal.m_counterChunks.pushBack(newChunk);
+
+		{
+			LockGuard<SpinLock> lock(m_frameMtx);
+			newChunk->m_frame = m_frame;
+			newChunk->m_startFrameTime = m_startFrameTime;
+		}
+	}
+
+	CountersChunk& chunk = threadLocal.m_counterChunks.getBack();
+
+	Counter& counter = chunk.m_counters[chunk.m_counterCount++];
+	counter.m_name = counterName;
+	counter.m_value = value;
+}
+
+void Tracer::gatherCounters(FlushCtx& ctx)
+{
+	// Iterate all the chunks and create the PerFrameCounters
+	for(ThreadLocal* threadLocal : m_allThreadLocal)
+	{
+		while(!threadLocal->m_counterChunks.isEmpty())
+		{
+			// Pop chunk
+			CountersChunk& chunk = threadLocal->m_counterChunks.getFront();
+			threadLocal->m_counterChunks.popFront();
+
+			// Iterate the PerFrameCounters to find if the frame is present
+			PerFrameCounters* perFrame = nullptr;
+			for(PerFrameCounters& pf : ctx.m_counters)
+			{
+				if(pf.m_frame == chunk.m_frame)
+				{
+					perFrame = &pf;
+					break;
+				}
+			}
+
+			if(!perFrame)
+			{
+				ctx.m_counters.emplaceBack(m_alloc);
+
+				perFrame = &ctx.m_counters.getBack();
+				perFrame->m_frame = chunk.m_frame;
+				perFrame->m_startFrameTime = chunk.m_startFrameTime;
+			}
+
+			ANKI_ASSERT(chunk.m_frame == perFrame->m_frame);
+
+			// Copy the counters
+			for(U i = 0; i < chunk.m_counterCount; ++i)
+			{
+				const Counter& inCounter = chunk.m_counters[i];
+
+				GatherCounter outCounter;
+				outCounter.m_name = inCounter.m_name;
+				outCounter.m_value = inCounter.m_value;
+
+				perFrame->m_tempCounters.emplaceBack(outCounter);
+			}
+
+			// Delete chunk
+			m_alloc.deleteInstance(&chunk);
+		}
+	}
+
+	if(ctx.m_counters.getSize() == 0)
+	{
+		// Early exit
+		return;
+	}
+
+	// Compact the counters and get all counter names
+	for(PerFrameCounters& perFrame : ctx.m_counters)
+	{
+		if(perFrame.m_tempCounters.getSize() == 0)
+		{
+			continue;
+		}
+
+		// Sort counters
+		std::sort(perFrame.m_tempCounters.getBegin(),
+			perFrame.m_tempCounters.getEnd(),
+			[](const GatherCounter& a, const GatherCounter& b) { return a.m_name < b.m_name; });
+
+		// Compact counters
+		for(const GatherCounter& tmpCounter : perFrame.m_tempCounters)
+		{
+			if(perFrame.m_counters.getSize() == 0 || perFrame.m_counters.getBack().m_name != tmpCounter.m_name)
+			{
+				// Create new counter
+				perFrame.m_counters.emplaceBack(tmpCounter);
+
+				// Update the counter names
+				Bool found = false;
+				for(const CString& counterName : ctx.m_counterNames)
+				{
+					if(counterName == tmpCounter.m_name)
+					{
+						found = true;
+						break;
+					}
+				}
+
+				if(!found)
+				{
+					ctx.m_counterNames.emplaceBack(tmpCounter.m_name);
+				}
+			}
+			else
+			{
+				// Merge counters
+				GatherCounter& mergeTo = perFrame.m_counters.getBack();
+				ANKI_ASSERT(mergeTo.m_name == tmpCounter.m_name);
+				mergeTo.m_value += tmpCounter.m_value;
+			}
+		}
+
+		// Free some memory
+		perFrame.m_tempCounters.destroy();
+	}
+
+	// Sort the counter names
+	ANKI_ASSERT(ctx.m_counterNames.getSize() > 0);
+	std::sort(ctx.m_counterNames.getBegin(), ctx.m_counterNames.getEnd(), [](CString a, CString b) { return a < b; });
+
+	// Fill the gaps. Some counters might have not appeared in some frames. Those counters need to have a zero value
+	// because the CSV wants all counters present on all rows
+	for(PerFrameCounters& perFrame : ctx.m_counters)
+	{
+		ANKI_ASSERT(perFrame.m_counters.getSize() <= ctx.m_counterNames.getSize());
+
+		for(U i = 0; i < ctx.m_counterNames.getSize(); ++i)
+		{
+			const CString& counterName = ctx.m_counterNames[i];
+
+			// Try to find the counter
+			Bool found = false;
+			for(const GatherCounter& c : perFrame.m_counters)
+			{
+				if(counterName == c.m_name)
+				{
+					found = true;
+					break;
+				}
+			}
+
+			if(!found)
+			{
+				// Counter is missing
+				GatherCounter missingCounter;
+				missingCounter.m_name = counterName;
+				missingCounter.m_value = 0;
+				perFrame.m_counters.emplaceBack(missingCounter);
+			}
+		}
+
+		// Sort again
+		std::sort(perFrame.m_counters.getBegin(),
+			perFrame.m_counters.getEnd(),
+			[](const GatherCounter& a, const GatherCounter& b) { return a.m_name < b.m_name; });
+
+		ANKI_ASSERT(perFrame.m_counters.getSize() == ctx.m_counterNames.getSize());
+	}
+}
+
+void Tracer::gatherEvents(FlushCtx& ctx)
+{
+	for(ThreadLocal* threadLocal : m_allThreadLocal)
+	{
+		while(!threadLocal->m_eventChunks.isEmpty())
+		{
+			// Pop chunk
+			EventsChunk& chunk = threadLocal->m_eventChunks.getFront();
+			threadLocal->m_eventChunks.popFront();
+
+			// Copy
+			for(U i = 0; i < chunk.m_eventCount; ++i)
+			{
+				const Event& inEvent = chunk.m_events[i];
+
+				GatherEvent outEvent;
+				outEvent.m_duration = inEvent.m_duration;
+				outEvent.m_name = inEvent.m_name;
+				outEvent.m_timestamp = inEvent.m_timestamp;
+				outEvent.m_tid = threadLocal->m_tid;
+
+				ctx.m_events.emplaceBack(outEvent);
+			}
+
+			// Delete poped chunk
+			m_alloc.deleteInstance(&chunk);
+		}
+	}
+
+	// Sort them
+	std::sort(ctx.m_events.getBegin(), ctx.m_events.getEnd(), [](const GatherEvent& a, const GatherEvent& b) {
+		if(a.m_timestamp != b.m_timestamp)
+		{
+			return a.m_timestamp < b.m_timestamp;
+		}
+
+		if(a.m_duration != b.m_duration)
+		{
+			return a.m_duration < b.m_duration;
+		}
+
+		return a.m_name < b.m_name;
+	});
+}
+
+Error Tracer::writeTraceJson(const FlushCtx& ctx)
+{
+	// Open the file
+	StringAuto newFname(m_alloc);
+	newFname.sprintf("%s.trace.json", ctx.m_filename.cstr());
+	File file;
+	ANKI_CHECK(file.open(newFname.toCString(), FileOpenFlag::WRITE));
+
+	if(ctx.m_events.getSize() == 0)
+	{
+		// Early exit
+		return Error::NONE;
+	}
+
+	ANKI_CHECK(file.writeText("[\n"));
+
+	// Write the events to the file
+	for(const GatherEvent& event : ctx.m_events)
+	{
+		const U64 startMicroSec = U64(event.m_timestamp * 1000000.0);
+		const U64 durMicroSec = U64(event.m_duration * 1000000.0);
+
+		if(durMicroSec == 0)
+		{
+			continue;
+		}
+
+		ANKI_CHECK(file.writeText("{\"name\": \"%s\", \"cat\": \"PERF\", \"ph\": \"X\", "
+								  "\"pid\": 1, \"tid\": %llu, \"ts\": %llu, \"dur\": %llu},\n",
+			event.m_name.cstr(),
+			event.m_tid,
+			startMicroSec,
+			durMicroSec));
+	}
+
+	// Write the counters
+	for(U i = 0; i < ctx.m_counters.getSize(); ++i)
+	{
+		const PerFrameCounters& frame = ctx.m_counters[i];
+		const Second startFrameTime = frame.m_startFrameTime;
+
+		// The counters need a range in order to appear. Add a dummy counter for the last frame
+		const Array<Second, 2> timestamps = {{startFrameTime, startFrameTime + 1.0}};
+		const U timestampCount = (i < ctx.m_counters.getSize() - 1) ? 1 : 2;
+
+		for(const GatherCounter& counter : frame.m_counters)
+		{
+			for(U j = 0; j < timestampCount; ++j)
+			{
+				ANKI_CHECK(file.writeText("{\"name\": \"%s\", \"cat\": \"PERF\", \"ph\": \"C\", "
+										  "\"pid\": 1, \"ts\": %llu, \"args\": {\"val\": %llu}},\n",
+					counter.m_name.cstr(),
+					U64(timestamps[j] * 1000000.0),
+					counter.m_value));
+			}
+		}
+	}
+
+	ANKI_CHECK(file.writeText("{}\n]\n"));
+
+	return Error::NONE;
+}
+
+Error Tracer::writeCounterCsv(const FlushCtx& ctx)
+{
+	// Open the file
+	StringAuto fname(m_alloc);
+	fname.sprintf("%s.counters.csv", ctx.m_filename.cstr());
+	File file;
+	ANKI_CHECK(file.open(fname.toCString(), FileOpenFlag::WRITE));
+
+	if(ctx.m_counters.getSize() == 0)
+	{
+		// If there are no counters leave the file empty and exit
+		return Error::NONE;
+	}
+
+	// Write the counter names
+	ANKI_CHECK(file.writeText("Frame"));
+	for(CString counterName : ctx.m_counterNames)
+	{
+		ANKI_CHECK(file.writeText(",%s", counterName.cstr()));
+	}
+	ANKI_CHECK(file.writeText("\n"));
+
+	// Dump the frames
+	U rowCount = 0;
+	for(const PerFrameCounters& frame : ctx.m_counters)
+	{
+		ANKI_CHECK(file.writeText("%llu", frame.m_frame));
+
+		for(const GatherCounter& c : frame.m_counters)
+		{
+			ANKI_CHECK(file.writeText(",%llu", c.m_value));
+		}
+
+		ANKI_CHECK(file.writeText("\n"));
+		++rowCount;
+	}
+
+	// Dump some spreadsheet functions
+	ANKI_CHECK(file.writeText("SUM"));
+	for(U i = 0; i < ctx.m_counterNames.getSize(); ++i)
+	{
+		Array<char, 3> columnName;
+		getSpreadsheetColumnName(i + 1, columnName);
+		ANKI_CHECK(file.writeText(",=SUM(%s2:%s%u)", &columnName[0], &columnName[0], rowCount + 1u));
+	}
+	ANKI_CHECK(file.writeText("\n"));
+
+	ANKI_CHECK(file.writeText("AVG"));
+	for(U i = 0; i < ctx.m_counterNames.getSize(); ++i)
+	{
+		Array<char, 3> columnName;
+		getSpreadsheetColumnName(i + 1, columnName);
+		ANKI_CHECK(file.writeText(",=AVERAGE(%s2:%s%u)", &columnName[0], &columnName[0], rowCount + 1u));
+	}
+
+	return Error::NONE;
+}
+
+Error Tracer::flush(CString filename)
+{
+	FlushCtx ctx(m_alloc, filename);
+
+	gatherCounters(ctx);
+	gatherEvents(ctx);
+
+	ANKI_CHECK(writeTraceJson(ctx));
+	ANKI_CHECK(writeCounterCsv(ctx));
+
+	return Error::NONE;
+}
+
+void Tracer::getSpreadsheetColumnName(U column, Array<char, 3>& arr)
+{
+	U major = column / 26;
+	U minor = column % 26;
+
+	if(major)
+	{
+		arr[0] = 'A' + (major - 1);
+		arr[1] = 'A' + minor;
+	}
+	else
+	{
+		arr[0] = 'A' + minor;
+		arr[1] = '\0';
+	}
+
+	arr[2] = '\0';
+}
+
+} // end namespace anki

+ 127 - 0
src/anki/util/Tracer.h

@@ -0,0 +1,127 @@
+// Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <anki/util/File.h>
+#include <anki/util/List.h>
+#include <anki/util/ObjectAllocator.h>
+#include <anki/util/Singleton.h>
+
+namespace anki
+{
+
+/// @addtogroup util_other
+/// @{
+
+/// @memberof Tracer
+using TracerEventHandle = void*;
+
+/// Tracer.
+class Tracer : public NonCopyable
+{
+public:
+	Tracer()
+	{
+	}
+
+	~Tracer();
+
+	void init(GenericMemoryPoolAllocator<U8> alloc)
+	{
+		m_alloc = alloc;
+	}
+
+	Bool isInitialized() const
+	{
+		return !!m_alloc;
+	}
+
+	/// Begin a new event.
+	ANKI_USE_RESULT TracerEventHandle beginEvent();
+
+	/// End the event that got started with beginEvent().
+	void endEvent(const char* eventName, TracerEventHandle event);
+
+	/// Increase a counter.
+	void increaseCounter(const char* counterName, U64 value);
+
+	/// Begin a new frame.
+	void newFrame(U64 frame);
+
+	/// Flush all results to a file. Don't call that more than once.
+	ANKI_USE_RESULT Error flush(CString filename);
+
+private:
+	static const U32 EVENTS_PER_CHUNK = 256;
+	static const U32 COUNTERS_PER_CHUNK = 512;
+
+	class Event;
+	class EventsChunk;
+	class GatherEvent;
+
+	class Counter;
+	class CountersChunk;
+	class GatherCounter;
+
+	class ThreadLocal;
+	class PerFrameCounters;
+	class FlushCtx;
+
+	GenericMemoryPoolAllocator<U8> m_alloc;
+
+	Second m_startFrameTime = 0.0;
+	U64 m_frame = 0;
+	SpinLock m_frameMtx; ///< Protect m_startFrameTime and m_frame.
+
+	static thread_local ThreadLocal* m_threadLocal;
+	DynamicArray<ThreadLocal*> m_allThreadLocal; ///< The Tracer should know about all the ThreadLocal.
+	Mutex m_threadLocalMtx;
+
+	/// Get the thread local ThreadLocal structure.
+	ThreadLocal& getThreadLocal();
+
+	/// Gather all counters from all the threads.
+	void gatherCounters(FlushCtx& ctx);
+
+	/// Gather the events from all the threads.
+	void gatherEvents(FlushCtx& ctx);
+
+	/// Dump the counters to a CSV file
+	Error writeCounterCsv(const FlushCtx& ctx);
+
+	/// Dump the events and the counters to a chrome trace file.
+	Error writeTraceJson(const FlushCtx& ctx);
+
+	static void getSpreadsheetColumnName(U column, Array<char, 3>& arr);
+};
+
+/// Tracer singleton.
+using TracerSingleton = Singleton<Tracer>;
+
+/// Convenience class to trace an event.
+class TraceScopedEvent
+{
+public:
+	TraceScopedEvent(const char* name)
+		: m_name(name)
+		, m_tracer(&TracerSingleton::get())
+	{
+		m_handle = m_tracer->beginEvent();
+	}
+
+	~TraceScopedEvent()
+	{
+		m_tracer->endEvent(m_name, m_handle);
+	}
+
+private:
+	const char* m_name;
+	TracerEventHandle m_handle;
+	Tracer* m_tracer;
+};
+/// @}
+
+} // end namespace anki

+ 52 - 0
tests/util/Tracer.cpp

@@ -0,0 +1,52 @@
+// Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <tests/framework/Framework.h>
+#include <anki/util/Tracer.h>
+#include <anki/util/HighRezTimer.h>
+
+ANKI_TEST(Util, Tracer)
+{
+	HeapAllocator<U8> alloc(allocAligned, nullptr);
+	Tracer tracer;
+	tracer.init(alloc);
+
+	// 1st frame
+	tracer.newFrame(0);
+	ANKI_TEST_EXPECT_NO_ERR(tracer.flush("./0"));
+
+	// 2nd frame
+	// 2 same events
+	tracer.newFrame(1);
+
+	auto handle0 = tracer.beginEvent();
+	HighRezTimer::sleep(0.5);
+	tracer.endEvent("event", handle0);
+
+	auto handle1 = tracer.beginEvent();
+	HighRezTimer::sleep(0.5);
+	tracer.endEvent("event", handle1);
+
+	// 4rd frame
+	// 2 different events & non zero counter
+	tracer.newFrame(3);
+
+	auto handle2 = tracer.beginEvent();
+	HighRezTimer::sleep(0.5);
+	tracer.endEvent("event", handle2);
+
+	auto handle3 = tracer.beginEvent();
+	HighRezTimer::sleep(0.5);
+	tracer.endEvent("event2", handle3);
+
+	tracer.increaseCounter("counter", 100);
+
+	// 5th frame
+	tracer.newFrame(4);
+	tracer.increaseCounter("counter", 150);
+	HighRezTimer::sleep(0.1);
+
+	ANKI_TEST_EXPECT_NO_ERR(tracer.flush("./1"));
+}