Browse Source

Complete the tracer

Panagiotis Christopoulos Charitos 7 years ago
parent
commit
e2d045c646

+ 1 - 17
programs/DownscaleBlur.ankiprog

@@ -14,23 +14,7 @@ http://www.anki3d.org/LICENSE
 
 		<shader type="frag">
 			<source><![CDATA[
-#include "shaders/Common.glsl"
-
-layout(ANKI_TEX_BINDING(0, 0)) uniform sampler2D u_tex;
-
-layout(location = 0) in vec2 in_uv;
-layout(location = 0) out vec3 out_color;
-
-void main()
-{
-	out_color = textureLod(u_tex, in_uv, 0.0).rgb;
-	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(+1, +1)).rgb;
-	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(-1, -1)).rgb;
-	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(+1, -1)).rgb;
-	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(-1, +1)).rgb;
-
-	out_color /= 5.0;
-}
+#include "shaders/DownscaleBlur.glsl"
 			]]></source>
 		</shader>
 	</shaders>

+ 19 - 0
programs/DownscaleBlurCompute.ankiprog

@@ -0,0 +1,19 @@
+<!-- 
+Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
+All rights reserved.
+Code licensed under the BSD License.
+http://www.anki3d.org/LICENSE
+-->
+<shaderProgram>
+	<shaders>
+		<shader type="comp">
+			<inputs>
+				<input name="WORKGROUP_SIZE" type="uvec2" const="1"/>
+			</inputs>
+
+			<source><![CDATA[
+#include "shaders/DownscaleBlur.glsl"
+			]]></source>
+		</shader>
+	</shaders>
+</shaderProgram>

+ 55 - 0
shaders/DownscaleBlur.glsl

@@ -0,0 +1,55 @@
+// Copyright (C) 2009-2018, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#ifndef ANKI_SHADERS_DOWNSCALE_BLUR_GLSL
+#define ANKI_SHADERS_DOWNSCALE_BLUR_GLSL
+
+#include "shaders/Common.glsl"
+
+layout(ANKI_TEX_BINDING(0, 0)) uniform sampler2D u_tex;
+
+#if defined(ANKI_COMPUTE_SHADER)
+layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
+
+// Push constants hold the size of the output image
+struct PushConsts
+{
+	uvec4 outImageSizePad2;
+};
+ANKI_PUSH_CONSTANTS(PushConsts, u_regs);
+#	define u_fbSize (u_regs.outImageSizePad2.xy)
+
+vec2 in_uv = (vec2(gl_GlobalInvocationID.xy) + 0.5) / vec2(u_fbSize);
+layout(ANKI_IMAGE_BINDING(0, 0)) writeonly uniform image2D out_img;
+vec3 out_color;
+#else
+layout(location = 0) in vec2 in_uv;
+layout(location = 0) out vec3 out_color;
+#endif
+
+void main()
+{
+#if defined(ANKI_COMPUTE_SHADER)
+	if(gl_GlobalInvocationID.x >= u_fbSize.x || gl_GlobalInvocationID.y >= u_fbSize.y)
+	{
+		// Skip pixels outside the viewport
+		return;
+	}
+#endif
+
+	out_color = textureLod(u_tex, in_uv, 0.0).rgb;
+	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(+1, +1)).rgb;
+	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(-1, -1)).rgb;
+	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(+1, -1)).rgb;
+	out_color += textureLodOffset(u_tex, in_uv, 0.0, ivec2(-1, +1)).rgb;
+
+	out_color *= (1.0 / 5.0);
+
+#if defined(ANKI_COMPUTE_SHADER)
+	imageStore(out_img, ivec2(gl_GlobalInvocationID.xy), vec4(out_color, 0.0));
+#endif
+}
+
+#endif

+ 3 - 0
src/anki/core/App.cpp

@@ -622,6 +622,7 @@ Error App::mainLoop()
 		static U64 frame = 1;
 		TracerSingleton::get().newFrame(frame++);
 #endif
+		ANKI_TRACE_START_EVENT(FRAME);
 		const Second startTime = HighRezTimer::getCurrentTime();
 
 		prevUpdateTime = crntTime;
@@ -661,6 +662,8 @@ Error App::mainLoop()
 		// Now resume the loader
 		m_resources->getAsyncLoader().resume();
 
+		ANKI_TRACE_STOP_EVENT(FRAME);
+
 		// Sleep
 		const Second endTime = HighRezTimer::getCurrentTime();
 		const Second frameTime = endTime - startTime;

+ 2 - 2
src/anki/core/Trace.h

@@ -11,8 +11,8 @@
 /// @name Trace macros.
 /// @{
 #if ANKI_ENABLE_TRACE
-#	define ANKI_TRACE_START_EVENT(name_) TracerSingleton::get().beginEvent()
-#	define ANKI_TRACE_STOP_EVENT(name_) TracerSingleton::get().endEvent(#	name_)
+#	define ANKI_TRACE_START_EVENT(name_) TracerEventHandle _teh##name_ = TracerSingleton::get().beginEvent()
+#	define ANKI_TRACE_STOP_EVENT(name_) TracerSingleton::get().endEvent(#	name_, _teh##name_)
 #	define ANKI_TRACE_SCOPED_EVENT(name_) TraceScopedEvent _tse##name_(#	name_)
 #	define ANKI_TRACE_INC_COUNTER(name_, val_) TracerSingleton::get().increaseCounter(#	name_, val_)
 #else

+ 1 - 3
src/anki/gr/gl/CommandBufferImpl.cpp

@@ -43,7 +43,7 @@ void CommandBufferImpl::init(const CommandBufferInitInfo& init)
 
 void CommandBufferImpl::destroy()
 {
-	ANKI_TRACE_START_EVENT(GL_CMD_BUFFER_DESTROY);
+	ANKI_TRACE_SCOPED_EVENT(GL_CMD_BUFFER_DESTROY);
 
 #if ANKI_EXTRA_CHECKS
 	if(!m_executed && m_firstCommand)
@@ -64,8 +64,6 @@ void CommandBufferImpl::destroy()
 				&& "Someone is holding a reference to the command buffer's allocator");
 
 	m_alloc = CommandBufferAllocator<U8>();
-
-	ANKI_TRACE_STOP_EVENT(GL_CMD_BUFFER_DESTROY);
 }
 
 Error CommandBufferImpl::executeAllCommands()

+ 7 - 8
src/anki/gr/gl/RenderingThread.cpp

@@ -248,9 +248,11 @@ void RenderingThread::threadLoop()
 			++m_head;
 		}
 
-		ANKI_TRACE_START_EVENT(GL_THREAD);
-		Error err = static_cast<CommandBufferImpl&>(*cmd).executeAllCommands();
-		ANKI_TRACE_STOP_EVENT(GL_THREAD);
+		Error err = Error::NONE;
+		{
+			ANKI_TRACE_SCOPED_EVENT(GL_THREAD);
+			err = static_cast<CommandBufferImpl&>(*cmd).executeAllCommands();
+		}
 
 		if(err)
 		{
@@ -274,7 +276,7 @@ void RenderingThread::syncClientServer()
 
 void RenderingThread::swapBuffersInternal()
 {
-	ANKI_TRACE_START_EVENT(SWAP_BUFFERS);
+	ANKI_TRACE_SCOPED_EVENT(SWAP_BUFFERS);
 
 	// Do the swap buffers
 	m_manager->swapBuffers();
@@ -286,13 +288,11 @@ void RenderingThread::swapBuffersInternal()
 
 		m_frameCondVar.notifyOne();
 	}
-
-	ANKI_TRACE_STOP_EVENT(SWAP_BUFFERS);
 }
 
 void RenderingThread::swapBuffers()
 {
-	ANKI_TRACE_START_EVENT(SWAP_BUFFERS);
+	ANKI_TRACE_SCOPED_EVENT(SWAP_BUFFERS);
 	// Wait for the rendering thread to finish swap buffers...
 	{
 		LockGuard<Mutex> lock(m_frameMtx);
@@ -306,7 +306,6 @@ void RenderingThread::swapBuffers()
 
 	// ...and then flush a new swap buffers
 	flushCommandBuffer(m_swapBuffersCommands, nullptr);
-	ANKI_TRACE_STOP_EVENT(SWAP_BUFFERS);
 }
 
 } // end namespace anki

+ 4 - 3
src/anki/gr/vulkan/BufferImpl.cpp

@@ -133,9 +133,10 @@ Error BufferImpl::init(const BufferInitInfo& inf)
 	getGrManagerImpl().getGpuMemoryManager().allocateMemory(memIdx, req.size, req.alignment, true, m_memHandle);
 
 	// Bind mem to buffer
-	ANKI_TRACE_START_EVENT(VK_BIND_OBJECT);
-	ANKI_VK_CHECK(vkBindBufferMemory(getDevice(), m_handle, m_memHandle.m_memory, m_memHandle.m_offset));
-	ANKI_TRACE_STOP_EVENT(VK_BIND_OBJECT);
+	{
+		ANKI_TRACE_SCOPED_EVENT(VK_BIND_OBJECT);
+		ANKI_VK_CHECK(vkBindBufferMemory(getDevice(), m_handle, m_memHandle.m_memory, m_memHandle.m_offset));
+	}
 
 	m_access = access;
 	m_size = inf.m_size;

+ 1 - 3
src/anki/gr/vulkan/DescriptorSet.cpp

@@ -621,14 +621,13 @@ Error DescriptorSetFactory::newDescriptorSet(ThreadId tid,
 	Array<U32, MAX_UNIFORM_BUFFER_BINDINGS + MAX_STORAGE_BUFFER_BINDINGS>& dynamicOffsets,
 	U& dynamicOffsetCount)
 {
-	ANKI_TRACE_START_EVENT(VK_DESCRIPTOR_SET_GET_OR_CREATE);
+	ANKI_TRACE_SCOPED_EVENT(VK_DESCRIPTOR_SET_GET_OR_CREATE);
 
 	U64 hash;
 	state.flush(dirty, hash, dynamicOffsets, dynamicOffsetCount);
 
 	if(!dirty)
 	{
-		ANKI_TRACE_STOP_EVENT(VK_DESCRIPTOR_SET_GET_OR_CREATE);
 		return Error::NONE;
 	}
 
@@ -645,7 +644,6 @@ Error DescriptorSetFactory::newDescriptorSet(ThreadId tid,
 	set.m_handle = s->m_handle;
 	ANKI_ASSERT(set.m_handle != VK_NULL_HANDLE);
 
-	ANKI_TRACE_STOP_EVENT(VK_DESCRIPTOR_SET_GET_OR_CREATE);
 	return Error::NONE;
 }
 

+ 15 - 11
src/anki/gr/vulkan/GrManagerImpl.cpp

@@ -684,14 +684,15 @@ void GrManagerImpl::beginFrame()
 
 	// Get new image
 	uint32_t imageIdx;
-	ANKI_TRACE_START_EVENT(VK_ACQUIRE_IMAGE);
-	ANKI_VK_CHECKF(vkAcquireNextImageKHR(m_device,
-		m_crntSwapchain->m_swapchain,
-		UINT64_MAX,
-		frame.m_acquireSemaphore->getHandle(),
-		fence->getHandle(),
-		&imageIdx));
-	ANKI_TRACE_STOP_EVENT(VK_ACQUIRE_IMAGE);
+	{
+		ANKI_TRACE_SCOPED_EVENT(VK_ACQUIRE_IMAGE);
+		ANKI_VK_CHECKF(vkAcquireNextImageKHR(m_device,
+			m_crntSwapchain->m_swapchain,
+			UINT64_MAX,
+			frame.m_acquireSemaphore->getHandle(),
+			fence->getHandle(),
+			&imageIdx));
+	}
 
 	ANKI_ASSERT(imageIdx < MAX_FRAMES_IN_FLIGHT);
 	m_crntSwapchain->m_currentBackbufferIndex = imageIdx;
@@ -699,6 +700,8 @@ void GrManagerImpl::beginFrame()
 
 void GrManagerImpl::endFrame()
 {
+	ANKI_TRACE_SCOPED_EVENT(VK_PRESENT);
+
 	LockGuard<Mutex> lock(m_globalMtx);
 
 	PerFrame& frame = m_perFrame[m_frame % MAX_FRAMES_IN_FLIGHT];
@@ -804,9 +807,10 @@ void GrManagerImpl::flushCommandBuffer(CommandBufferPtr cmdb, FencePtr* outFence
 
 	impl.setFence(fence);
 
-	ANKI_TRACE_START_EVENT(VK_QUEUE_SUBMIT);
-	ANKI_VK_CHECKF(vkQueueSubmit(m_queue, 1, &submit, fence->getHandle()));
-	ANKI_TRACE_STOP_EVENT(VK_QUEUE_SUBMIT);
+	{
+		ANKI_TRACE_SCOPED_EVENT(VK_QUEUE_SUBMIT);
+		ANKI_VK_CHECKF(vkQueueSubmit(m_queue, 1, &submit, fence->getHandle()));
+	}
 
 	if(wait)
 	{

+ 5 - 3
src/anki/gr/vulkan/Pipeline.cpp

@@ -446,9 +446,11 @@ void PipelineFactory::newPipeline(PipelineStateTracker& state, Pipeline& ppline,
 		const VkGraphicsPipelineCreateInfo& ci = state.updatePipelineCreateInfo();
 		pp.m_fb = state.getFb();
 
-		ANKI_TRACE_START_EVENT(VK_PIPELINE_CREATE);
-		ANKI_VK_CHECKF(vkCreateGraphicsPipelines(m_dev, m_pplineCache, 1, &ci, nullptr, &pp.m_handle));
-		ANKI_TRACE_STOP_EVENT(VK_PIPELINE_CREATE);
+		{
+			ANKI_TRACE_SCOPED_EVENT(VK_PIPELINE_CREATE);
+			ANKI_VK_CHECKF(vkCreateGraphicsPipelines(m_dev, m_pplineCache, 1, &ci, nullptr, &pp.m_handle));
+		}
+
 		ANKI_TRACE_INC_COUNTER(VK_PIPELINE_CREATE, 1);
 
 		m_pplines.emplace(m_alloc, hash, pp);

+ 2 - 4
src/anki/gr/vulkan/TextureImpl.cpp

@@ -327,9 +327,8 @@ Error TextureImpl::initImage(const TextureInitInfo& init_)
 		getGrManagerImpl().getGpuMemoryManager().allocateMemory(memIdx, req.size, req.alignment, false, m_memHandle);
 
 		// Bind mem to image
-		ANKI_TRACE_START_EVENT(VK_BIND_OBJECT);
+		ANKI_TRACE_SCOPED_EVENT(VK_BIND_OBJECT);
 		ANKI_VK_CHECK(vkBindImageMemory(getDevice(), m_imageHandle, m_memHandle.m_memory, m_memHandle.m_offset));
-		ANKI_TRACE_STOP_EVENT(VK_BIND_OBJECT);
 	}
 	else
 	{
@@ -347,9 +346,8 @@ Error TextureImpl::initImage(const TextureInitInfo& init_)
 		getGrManagerImpl().trySetVulkanHandleName(
 			init.getName(), VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, ptrToNumber(m_dedicatedMem));
 
-		ANKI_TRACE_START_EVENT(VK_BIND_OBJECT);
+		ANKI_TRACE_SCOPED_EVENT(VK_BIND_OBJECT);
 		ANKI_VK_CHECK(vkBindImageMemory(getDevice(), m_imageHandle, m_dedicatedMem, 0));
-		ANKI_TRACE_STOP_EVENT(VK_BIND_OBJECT);
 	}
 
 	return Error::NONE;

+ 109 - 36
src/anki/renderer/DownscaleBlur.cpp

@@ -34,26 +34,49 @@ Error DownscaleBlur::initInternal(const ConfigSet&)
 	// Create the miped texture
 	TextureInitInfo texinit = m_r->create2DRenderTargetDescription(
 		m_r->getWidth() / 2, m_r->getHeight() / 2, LIGHT_SHADING_COLOR_ATTACHMENT_PIXEL_FORMAT, "DownscaleBlur");
-	texinit.m_usage = TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE
-					  | TextureUsageBit::SAMPLED_COMPUTE;
+	texinit.m_usage = TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::SAMPLED_COMPUTE
+					  | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE; // TODO remove FB_WRITE
+	if(m_useCompute)
+	{
+		texinit.m_usage |= TextureUsageBit::SAMPLED_COMPUTE | TextureUsageBit::IMAGE_COMPUTE_WRITE;
+	}
+	else
+	{
+		texinit.m_usage |= TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE;
+	}
 	texinit.m_mipmapCount = m_passCount;
 	texinit.m_initialUsage = TextureUsageBit::SAMPLED_COMPUTE;
 	m_rtTex = m_r->createAndClearRenderTarget(texinit);
 
 	// FB descr
-	m_fbDescrs.create(getAllocator(), m_passCount);
-	for(U pass = 0; pass < m_passCount; ++pass)
+	if(!m_useCompute)
 	{
-		m_fbDescrs[pass].m_colorAttachmentCount = 1;
-		m_fbDescrs[pass].m_colorAttachments[0].m_loadOperation = AttachmentLoadOperation::DONT_CARE;
-		m_fbDescrs[pass].m_colorAttachments[0].m_surface.m_level = pass;
-		m_fbDescrs[pass].bake();
+		m_fbDescrs.create(getAllocator(), m_passCount);
+		for(U pass = 0; pass < m_passCount; ++pass)
+		{
+			m_fbDescrs[pass].m_colorAttachmentCount = 1;
+			m_fbDescrs[pass].m_colorAttachments[0].m_loadOperation = AttachmentLoadOperation::DONT_CARE;
+			m_fbDescrs[pass].m_colorAttachments[0].m_surface.m_level = pass;
+			m_fbDescrs[pass].bake();
+		}
 	}
 
 	// Shader programs
-	ANKI_CHECK(getResourceManager().loadResource("programs/DownscaleBlur.ankiprog", m_prog));
-	const ShaderProgramResourceVariant* variant;
-	m_prog->getOrCreateVariant(variant);
+	const ShaderProgramResourceVariant* variant = nullptr;
+	if(m_useCompute)
+	{
+		ANKI_CHECK(getResourceManager().loadResource("programs/DownscaleBlurCompute.ankiprog", m_prog));
+
+		ShaderProgramResourceConstantValueInitList<1> consts(m_prog);
+		consts.add("WORKGROUP_SIZE", UVec2(m_workgroupSize[0], m_workgroupSize[1]));
+
+		m_prog->getOrCreateVariant(consts.get(), variant);
+	}
+	else
+	{
+		ANKI_CHECK(getResourceManager().loadResource("programs/DownscaleBlur.ankiprog", m_prog));
+		m_prog->getOrCreateVariant(variant);
+	}
 	m_grProg = variant->getProgram();
 
 	return Error::NONE;
@@ -79,35 +102,68 @@ void DownscaleBlur::populateRenderGraph(RenderingContext& ctx)
 		"Down/Blur #5",
 		"Down/Blur #6",
 		"Down/Blur #7"}};
-	for(U i = 0; i < m_passCount; ++i)
+	if(m_useCompute)
 	{
-		GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(passNames[i]);
-		pass.setWork(runCallback, this, 0);
-
-		if(i > 0)
+		for(U i = 0; i < m_passCount; ++i)
 		{
-			TextureSubresourceInfo sampleSubresource;
-			TextureSubresourceInfo renderSubresource;
+			ComputeRenderPassDescription& pass = rgraph.newComputeRenderPass(passNames[i]);
+			pass.setWork(runCallback, this, 0);
+
+			if(i > 0)
+			{
+				TextureSubresourceInfo sampleSubresource;
+				TextureSubresourceInfo renderSubresource;
 
-			sampleSubresource.m_firstMipmap = i - 1;
-			renderSubresource.m_firstMipmap = i;
+				sampleSubresource.m_firstMipmap = i - 1;
+				renderSubresource.m_firstMipmap = i;
 
-			pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
-			pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::SAMPLED_FRAGMENT, sampleSubresource});
+				pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE, renderSubresource});
+				pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::SAMPLED_COMPUTE, sampleSubresource});
 
-			pass.newProducer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
+				pass.newProducer({m_runCtx.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE, renderSubresource});
+			}
+			else
+			{
+				TextureSubresourceInfo renderSubresource;
+
+				pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE, renderSubresource});
+				pass.newConsumer({m_r->getTemporalAA().getRt(), TextureUsageBit::SAMPLED_COMPUTE});
+
+				pass.newProducer({m_runCtx.m_rt, TextureUsageBit::IMAGE_COMPUTE_WRITE, renderSubresource});
+			}
 		}
-		else
+	}
+	else
+	{
+		for(U i = 0; i < m_passCount; ++i)
 		{
-			TextureSubresourceInfo renderSubresource;
+			GraphicsRenderPassDescription& pass = rgraph.newGraphicsRenderPass(passNames[i]);
+			pass.setWork(runCallback, this, 0);
+			pass.setFramebufferInfo(m_fbDescrs[i], {{m_runCtx.m_rt}}, {});
 
-			pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
-			pass.newConsumer({m_r->getTemporalAA().getRt(), TextureUsageBit::SAMPLED_FRAGMENT});
+			if(i > 0)
+			{
+				TextureSubresourceInfo sampleSubresource;
+				TextureSubresourceInfo renderSubresource;
 
-			pass.newProducer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
-		}
+				sampleSubresource.m_firstMipmap = i - 1;
+				renderSubresource.m_firstMipmap = i;
+
+				pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
+				pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::SAMPLED_FRAGMENT, sampleSubresource});
+
+				pass.newProducer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
+			}
+			else
+			{
+				TextureSubresourceInfo renderSubresource;
 
-		pass.setFramebufferInfo(m_fbDescrs[i], {{m_runCtx.m_rt}}, {});
+				pass.newConsumer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
+				pass.newConsumer({m_r->getTemporalAA().getRt(), TextureUsageBit::SAMPLED_FRAGMENT});
+
+				pass.newProducer({m_runCtx.m_rt, TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE, renderSubresource});
+			}
+		}
 	}
 }
 
@@ -115,15 +171,16 @@ void DownscaleBlur::run(RenderPassWorkContext& rgraphCtx)
 {
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 
+	cmdb->bindShaderProgram(m_grProg);
+
 	const U passIdx = m_runCtx.m_crntPassIdx++;
+	const U vpWidth = m_rtTex->getWidth() >> passIdx;
+	const U vpHeight = m_rtTex->getHeight() >> passIdx;
 
 	if(passIdx > 0)
 	{
-		// Bind the Rt
-
 		TextureSubresourceInfo sampleSubresource;
 		sampleSubresource.m_firstMipmap = passIdx - 1;
-
 		rgraphCtx.bindTextureAndSampler(0, 0, m_runCtx.m_rt, sampleSubresource, m_r->getLinearSampler());
 	}
 	else
@@ -131,9 +188,25 @@ void DownscaleBlur::run(RenderPassWorkContext& rgraphCtx)
 		rgraphCtx.bindColorTextureAndSampler(0, 0, m_r->getTemporalAA().getRt(), m_r->getLinearSampler());
 	}
 
-	cmdb->setViewport(0, 0, m_rtTex->getWidth() >> passIdx, m_rtTex->getHeight() >> passIdx);
-	cmdb->bindShaderProgram(m_grProg);
-	drawQuad(cmdb);
+	if(m_useCompute)
+	{
+		TextureSubresourceInfo sampleSubresource;
+		sampleSubresource.m_firstMipmap = passIdx;
+		rgraphCtx.bindImage(0, 0, m_runCtx.m_rt, sampleSubresource);
+
+		Vec4 fbSize(vpWidth, vpHeight, 0.0f, 0.0f);
+		cmdb->setPushConstants(&fbSize, sizeof(fbSize));
+	}
+
+	if(m_useCompute)
+	{
+		dispatchPPCompute(cmdb, m_workgroupSize[0], m_workgroupSize[1], vpWidth, vpHeight);
+	}
+	else
+	{
+		cmdb->setViewport(0, 0, vpWidth, vpHeight);
+		drawQuad(cmdb);
+	}
 }
 
 } // end namespace anki

+ 3 - 0
src/anki/renderer/DownscaleBlur.h

@@ -53,6 +53,9 @@ anki_internal:
 	}
 
 private:
+	static const Bool m_useCompute = false;
+	Array<U32, 2> m_workgroupSize = {{8, 8}};
+
 	U8 m_passCount = 0; ///< It's also the mip count of the m_rtTex.
 
 	TexturePtr m_rtTex;

+ 1 - 1
src/anki/renderer/Drawer.cpp

@@ -79,7 +79,7 @@ void RenderableDrawer::flushDrawcall(DrawContext& ctx)
 	// Rendered something, reset the cached transforms
 	if(ctx.m_cachedRenderElementCount > 1)
 	{
-		ANKI_TRACE_INC_COUNTER(RENDERER_MERGED_DRAWCALLS, ctx.m_cachedRenderElementCount - 1);
+		ANKI_TRACE_INC_COUNTER(R_MERGED_DRAWCALLS, ctx.m_cachedRenderElementCount - 1);
 	}
 	ctx.m_cachedRenderElementCount = 0;
 }

+ 2 - 2
src/anki/renderer/GBuffer.cpp

@@ -70,7 +70,7 @@ Error GBuffer::initInternal(const ConfigSet& initializer)
 
 void GBuffer::runInThread(const RenderingContext& ctx, RenderPassWorkContext& rgraphCtx) const
 {
-	ANKI_TRACE_SCOPED_EVENT(RENDER_MS);
+	ANKI_TRACE_SCOPED_EVENT(R_MS);
 
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	const U threadId = rgraphCtx.m_currentSecondLevelCommandBufferIndex;
@@ -136,7 +136,7 @@ void GBuffer::runInThread(const RenderingContext& ctx, RenderPassWorkContext& rg
 
 void GBuffer::populateRenderGraph(RenderingContext& ctx)
 {
-	ANKI_TRACE_SCOPED_EVENT(RENDER_MS);
+	ANKI_TRACE_SCOPED_EVENT(R_MS);
 
 	m_ctx = &ctx;
 	RenderGraphDescription& rgraph = ctx.m_renderGraphDescr;

+ 5 - 5
src/anki/renderer/Indirect.cpp

@@ -348,7 +348,7 @@ void Indirect::prepareProbes(
 void Indirect::runGBuffer(CommandBufferPtr& cmdb)
 {
 	ANKI_ASSERT(m_ctx.m_probe);
-	ANKI_TRACE_SCOPED_EVENT(RENDER_IR);
+	ANKI_TRACE_SCOPED_EVENT(R_IR);
 	const ReflectionProbeQueueElement& probe = *m_ctx.m_probe;
 
 	// For each face
@@ -404,7 +404,7 @@ void Indirect::bindVertexIndexBuffers(MeshResourcePtr& mesh, CommandBufferPtr& c
 void Indirect::runLightShading(U32 faceIdx, RenderPassWorkContext& rgraphCtx)
 {
 	ANKI_ASSERT(faceIdx <= 6);
-	ANKI_TRACE_SCOPED_EVENT(RENDER_IR);
+	ANKI_TRACE_SCOPED_EVENT(R_IR);
 
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 
@@ -525,7 +525,7 @@ void Indirect::runMipmappingOfLightShading(U32 faceIdx, RenderPassWorkContext& r
 	ANKI_ASSERT(faceIdx < 6);
 	ANKI_ASSERT(m_ctx.m_cacheEntryIdx < m_cacheEntries.getSize());
 
-	ANKI_TRACE_SCOPED_EVENT(RENDER_IR);
+	ANKI_TRACE_SCOPED_EVENT(R_IR);
 
 	TextureSubresourceInfo subresource(TextureSurfaceInfo(0, 0, faceIdx, m_ctx.m_cacheEntryIdx));
 	subresource.m_mipmapCount = m_lightShading.m_mipCount;
@@ -541,7 +541,7 @@ void Indirect::runMipmappingOfLightShading(U32 faceIdx, RenderPassWorkContext& r
 void Indirect::runIrradiance(U32 faceIdx, RenderPassWorkContext& rgraphCtx)
 {
 	ANKI_ASSERT(faceIdx < 6);
-	ANKI_TRACE_SCOPED_EVENT(RENDER_IR);
+	ANKI_TRACE_SCOPED_EVENT(R_IR);
 	const U32 cacheEntryIdx = m_ctx.m_cacheEntryIdx;
 	ANKI_ASSERT(cacheEntryIdx < m_cacheEntries.getSize());
 
@@ -563,7 +563,7 @@ void Indirect::runIrradiance(U32 faceIdx, RenderPassWorkContext& rgraphCtx)
 
 void Indirect::populateRenderGraph(RenderingContext& rctx)
 {
-	ANKI_TRACE_SCOPED_EVENT(RENDER_IR);
+	ANKI_TRACE_SCOPED_EVENT(R_IR);
 
 #if ANKI_EXTRA_CHECKS
 	m_ctx = {};

+ 132 - 126
src/anki/renderer/LightBin.cpp

@@ -19,13 +19,13 @@ const U SIZE_IDX_COUNT = 4;
 // Shader structs and block representations. All positions and directions in viewspace
 // For documentation see the shaders
 
-class ShaderCluster
+class LightBin::ShaderCluster
 {
 public:
 	U32 m_firstIdx;
 };
 
-class ShaderPointLight
+class LightBin::ShaderPointLight
 {
 public:
 	Vec4 m_posRadius;
@@ -34,7 +34,7 @@ public:
 	UVec2 m_atlasTiles;
 };
 
-class ShaderSpotLight
+class LightBin::ShaderSpotLight
 {
 public:
 	Vec4 m_posRadius;
@@ -44,7 +44,7 @@ public:
 	Mat4 m_texProjectionMat; ///< Texture projection matrix
 };
 
-class ShaderProbe
+class LightBin::ShaderProbe
 {
 public:
 	Vec3 m_pos;
@@ -59,7 +59,7 @@ public:
 	}
 };
 
-class ShaderDecal
+class LightBin::ShaderDecal
 {
 public:
 	Vec4 m_diffUv;
@@ -73,7 +73,7 @@ static const U MAX_PROBES_PER_CLUSTER = 12;
 static const U MAX_DECALS_PER_CLUSTER = 8;
 static const F32 INVALID_TEXTURE_INDEX = -1.0;
 
-class ClusterLightIndex
+class LightBin::ClusterLightIndex
 {
 public:
 	ClusterLightIndex()
@@ -92,23 +92,24 @@ public:
 		m_index = i;
 	}
 
+	friend Bool operator<(const ClusterLightIndex& a, const ClusterLightIndex& b)
+	{
+		return a.getIndex() < b.getIndex();
+	}
+
 private:
 	U16 m_index;
 };
 
-static Bool operator<(const ClusterLightIndex& a, const ClusterLightIndex& b)
-{
-	return a.getIndex() < b.getIndex();
-}
-
 /// Store the probe radius for sorting the indices.
 /// WARNING: Keep it as small as possible, that's why the members are U16
-class ClusterProbeIndex
+class LightBin::ClusterProbeIndex
 {
 public:
 	ClusterProbeIndex()
 	{
 		// Do nothing. No need to initialize
+		static_assert(sizeof(ClusterProbeIndex) == sizeof(U16) * 2, "Because we memcmp");
 	}
 
 	U getIndex() const
@@ -144,10 +145,9 @@ private:
 	U16 m_index;
 	U16 m_probeRadius;
 };
-static_assert(sizeof(ClusterProbeIndex) == sizeof(U16) * 2, "Because we memcmp");
 
 /// WARNING: Keep it as small as possible. The number of clusters is huge
-class alignas(U32) ClusterData
+class alignas(U32) LightBin::ClusterData
 {
 public:
 	Atomic<U8> m_pointCount;
@@ -271,10 +271,10 @@ private:
 };
 
 /// Common data for all tasks.
-class LightBinContext
+class LightBin::BinContext
 {
 public:
-	LightBinContext(StackAllocator<U8> alloc)
+	BinContext(StackAllocator<U8> alloc)
 		: m_alloc(alloc)
 		, m_tempClusters(alloc)
 	{
@@ -326,10 +326,10 @@ public:
 };
 
 /// Write the lights to the GPU buffers.
-class WriteLightsTask : public ThreadPoolTask
+class LightBin::WriteLightsTask : public ThreadPoolTask
 {
 public:
-	LightBinContext* m_ctx = nullptr;
+	BinContext* m_ctx = nullptr;
 
 	Error operator()(U32 threadId, PtrSize threadsCount)
 	{
@@ -367,7 +367,7 @@ Error LightBin::bin(const Mat4& viewMat,
 	Bool shadowsEnabled,
 	LightBinOut& out)
 {
-	ANKI_TRACE_SCOPED_EVENT(RENDERER_LIGHT_BINNING);
+	ANKI_TRACE_SCOPED_EVENT(R_LIGHT_BINNING);
 
 	// Prepare the clusterer
 	ClustererPrepareInfo pinf;
@@ -387,13 +387,13 @@ Error LightBin::bin(const Mat4& viewMat,
 	const U visibleProbeCount = rqueue.m_reflectionProbes.getSize();
 	const U visibleDecalCount = rqueue.m_decals.getSize();
 
-	ANKI_TRACE_INC_COUNTER(RENDERER_LIGHTS, visiblePointLightsCount + visibleSpotLightsCount);
+	ANKI_TRACE_INC_COUNTER(R_LIGHTS, visiblePointLightsCount + visibleSpotLightsCount);
 
 	//
 	// Write the lights and tiles UBOs
 	//
 	Array<WriteLightsTask, ThreadPool::MAX_THREADS> tasks;
-	LightBinContext ctx(frameAlloc);
+	BinContext ctx(frameAlloc);
 	ctx.m_viewMat = viewMat;
 	ctx.m_viewProjMat = viewProjMat;
 	ctx.m_camTrf = camTrf;
@@ -498,157 +498,163 @@ Error LightBin::bin(const Mat4& viewMat,
 	return Error::NONE;
 }
 
-void LightBin::binLights(U32 threadId, PtrSize threadsCount, LightBinContext& ctx)
+void LightBin::binLights(U32 threadId, PtrSize threadsCount, BinContext& ctx)
 {
-	ANKI_TRACE_START_EVENT(RENDERER_LIGHT_BINNING);
 	U clusterCount = m_clusterCount;
 	PtrSize start, end;
 
 	//
 	// Initialize the temp clusters
 	//
-	ThreadPoolTask::choseStartEnd(threadId, threadsCount, clusterCount, start, end);
-
-	for(U i = start; i < end; ++i)
 	{
-		ctx.m_tempClusters[i].reset();
+		ANKI_TRACE_SCOPED_EVENT(R_LIGHT_BINNING);
+
+		ThreadPoolTask::choseStartEnd(threadId, threadsCount, clusterCount, start, end);
+
+		for(U i = start; i < end; ++i)
+		{
+			ctx.m_tempClusters[i].reset();
+		}
 	}
 
-	ANKI_TRACE_STOP_EVENT(RENDERER_LIGHT_BINNING);
 	m_barrier.wait();
-	ANKI_TRACE_START_EVENT(RENDERER_LIGHT_BINNING);
 
 	//
 	// Iterate lights and probes and bin them
 	//
-	ClustererTestResult testResult;
-	m_clusterer.initTestResults(ctx.m_alloc, testResult);
-	U lightCount = ctx.m_vPointLights.getSize() + ctx.m_vSpotLights.getSize();
-	U totalCount = lightCount + ctx.m_vProbes.getSize() + ctx.m_vDecals.getSize();
-
-	const U TO_BIN_COUNT = 1;
-	while((start = ctx.m_count2.fetchAdd(TO_BIN_COUNT)) < totalCount)
 	{
-		end = min<U>(start + TO_BIN_COUNT, totalCount);
+		ANKI_TRACE_SCOPED_EVENT(R_LIGHT_BINNING);
 
-		for(U j = start; j < end; ++j)
+		ClustererTestResult testResult;
+		m_clusterer.initTestResults(ctx.m_alloc, testResult);
+		U lightCount = ctx.m_vPointLights.getSize() + ctx.m_vSpotLights.getSize();
+		U totalCount = lightCount + ctx.m_vProbes.getSize() + ctx.m_vDecals.getSize();
+
+		const U TO_BIN_COUNT = 1;
+		while((start = ctx.m_count2.fetchAdd(TO_BIN_COUNT)) < totalCount)
 		{
-			if(j >= lightCount + ctx.m_vDecals.getSize())
-			{
-				U i = j - (lightCount + ctx.m_vDecals.getSize());
-				writeAndBinProbe(ctx.m_vProbes[i], ctx, testResult);
-			}
-			else if(j >= ctx.m_vPointLights.getSize() + ctx.m_vDecals.getSize())
-			{
-				U i = j - (ctx.m_vPointLights.getSize() + ctx.m_vDecals.getSize());
-				writeAndBinSpotLight(ctx.m_vSpotLights[i], ctx, testResult);
-			}
-			else if(j >= ctx.m_vDecals.getSize())
-			{
-				U i = j - ctx.m_vDecals.getSize();
-				writeAndBinPointLight(ctx.m_vPointLights[i], ctx, testResult);
-			}
-			else
+			end = min<U>(start + TO_BIN_COUNT, totalCount);
+
+			for(U j = start; j < end; ++j)
 			{
-				U i = j;
-				writeAndBinDecal(ctx.m_vDecals[i], ctx, testResult);
+				if(j >= lightCount + ctx.m_vDecals.getSize())
+				{
+					U i = j - (lightCount + ctx.m_vDecals.getSize());
+					writeAndBinProbe(ctx.m_vProbes[i], ctx, testResult);
+				}
+				else if(j >= ctx.m_vPointLights.getSize() + ctx.m_vDecals.getSize())
+				{
+					U i = j - (ctx.m_vPointLights.getSize() + ctx.m_vDecals.getSize());
+					writeAndBinSpotLight(ctx.m_vSpotLights[i], ctx, testResult);
+				}
+				else if(j >= ctx.m_vDecals.getSize())
+				{
+					U i = j - ctx.m_vDecals.getSize();
+					writeAndBinPointLight(ctx.m_vPointLights[i], ctx, testResult);
+				}
+				else
+				{
+					U i = j;
+					writeAndBinDecal(ctx.m_vDecals[i], ctx, testResult);
+				}
 			}
 		}
 	}
 
+	m_barrier.wait();
+
 	//
 	// Last thing, update the real clusters
 	//
-	ANKI_TRACE_STOP_EVENT(RENDERER_LIGHT_BINNING);
-	m_barrier.wait();
-	ANKI_TRACE_START_EVENT(RENDERER_LIGHT_BINNING);
 
-	// Run per cluster
-	const U CLUSTER_GROUP = 16;
-	while((start = ctx.m_count.fetchAdd(CLUSTER_GROUP)) < clusterCount)
 	{
-		end = min<U>(start + CLUSTER_GROUP, clusterCount);
+		ANKI_TRACE_SCOPED_EVENT(R_LIGHT_BINNING);
 
-		for(U i = start; i < end; ++i)
+		// Run per cluster
+		const U CLUSTER_GROUP = 16;
+		while((start = ctx.m_count.fetchAdd(CLUSTER_GROUP)) < clusterCount)
 		{
-			auto& cluster = ctx.m_tempClusters[i];
-			cluster.normalizeCounts();
+			end = min<U>(start + CLUSTER_GROUP, clusterCount);
 
-			const U countP = cluster.m_pointCount.get();
-			const U countS = cluster.m_spotCount.get();
-			const U countProbe = cluster.m_probeCount.get();
-			const U countDecal = cluster.m_decalCount.get();
-			const U count = countP + countS + countProbe + countDecal;
-
-			auto& c = ctx.m_clusters[i];
-			c.m_firstIdx = 0; // Point to the first empty indices
-
-			// Early exit
-			if(ANKI_UNLIKELY(count == 0))
+			for(U i = start; i < end; ++i)
 			{
-				continue;
-			}
+				auto& cluster = ctx.m_tempClusters[i];
+				cluster.normalizeCounts();
 
-			// Check if the previous cluster contains the same lights as this one and if yes then merge them. This will
-			// avoid allocating new IDs (and thrashing GPU caches).
-			cluster.sortLightIds();
-			if(i != start)
-			{
-				const auto& clusterB = ctx.m_tempClusters[i - 1];
+				const U countP = cluster.m_pointCount.get();
+				const U countS = cluster.m_spotCount.get();
+				const U countProbe = cluster.m_probeCount.get();
+				const U countDecal = cluster.m_decalCount.get();
+				const U count = countP + countS + countProbe + countDecal;
+
+				auto& c = ctx.m_clusters[i];
+				c.m_firstIdx = 0; // Point to the first empty indices
 
-				if(cluster == clusterB)
+				// Early exit
+				if(ANKI_UNLIKELY(count == 0))
 				{
-					c.m_firstIdx = ctx.m_clusters[i - 1].m_firstIdx;
 					continue;
 				}
-			}
 
-			U offset = ctx.m_lightIdsCount.fetchAdd(count + SIZE_IDX_COUNT);
-			U initialOffset = offset;
-			(void)initialOffset;
-
-			if(offset + count + SIZE_IDX_COUNT <= ctx.m_maxLightIndices)
-			{
-				c.m_firstIdx = offset;
-
-				ctx.m_lightIds[offset++] = countDecal;
-				for(U i = 0; i < countDecal; ++i)
+				// Check if the previous cluster contains the same lights as this one and if yes then merge them. This
+				// will avoid allocating new IDs (and thrashing GPU caches).
+				cluster.sortLightIds();
+				if(i != start)
 				{
-					ctx.m_lightIds[offset++] = cluster.m_decalIds[i].getIndex();
-				}
+					const auto& clusterB = ctx.m_tempClusters[i - 1];
 
-				ctx.m_lightIds[offset++] = countP;
-				for(U i = 0; i < countP; ++i)
-				{
-					ctx.m_lightIds[offset++] = cluster.m_pointIds[i].getIndex();
+					if(cluster == clusterB)
+					{
+						c.m_firstIdx = ctx.m_clusters[i - 1].m_firstIdx;
+						continue;
+					}
 				}
 
-				ctx.m_lightIds[offset++] = countS;
-				for(U i = 0; i < countS; ++i)
+				U offset = ctx.m_lightIdsCount.fetchAdd(count + SIZE_IDX_COUNT);
+				U initialOffset = offset;
+				(void)initialOffset;
+
+				if(offset + count + SIZE_IDX_COUNT <= ctx.m_maxLightIndices)
 				{
-					ctx.m_lightIds[offset++] = cluster.m_spotIds[i].getIndex();
+					c.m_firstIdx = offset;
+
+					ctx.m_lightIds[offset++] = countDecal;
+					for(U i = 0; i < countDecal; ++i)
+					{
+						ctx.m_lightIds[offset++] = cluster.m_decalIds[i].getIndex();
+					}
+
+					ctx.m_lightIds[offset++] = countP;
+					for(U i = 0; i < countP; ++i)
+					{
+						ctx.m_lightIds[offset++] = cluster.m_pointIds[i].getIndex();
+					}
+
+					ctx.m_lightIds[offset++] = countS;
+					for(U i = 0; i < countS; ++i)
+					{
+						ctx.m_lightIds[offset++] = cluster.m_spotIds[i].getIndex();
+					}
+
+					ctx.m_lightIds[offset++] = countProbe;
+					for(U i = 0; i < countProbe; ++i)
+					{
+						ctx.m_lightIds[offset++] = cluster.m_probeIds[i].getIndex();
+					}
+
+					ANKI_ASSERT(offset - initialOffset == count + SIZE_IDX_COUNT);
 				}
-
-				ctx.m_lightIds[offset++] = countProbe;
-				for(U i = 0; i < countProbe; ++i)
+				else
 				{
-					ctx.m_lightIds[offset++] = cluster.m_probeIds[i].getIndex();
+					ANKI_R_LOGW("Light IDs buffer too small");
 				}
-
-				ANKI_ASSERT(offset - initialOffset == count + SIZE_IDX_COUNT);
-			}
-			else
-			{
-				ANKI_R_LOGW("Light IDs buffer too small");
-			}
-		} // end for
-	} // end while
-
-	ANKI_TRACE_STOP_EVENT(RENDERER_LIGHT_BINNING);
+			} // end for
+		} // end while
+	} // scope
 }
 
 void LightBin::writeAndBinPointLight(
-	const PointLightQueueElement& lightEl, LightBinContext& ctx, ClustererTestResult& testResult)
+	const PointLightQueueElement& lightEl, BinContext& ctx, ClustererTestResult& testResult)
 {
 	// Get GPU light
 	I idx = ctx.m_pointLightsCount.fetchAdd(1);
@@ -694,7 +700,7 @@ void LightBin::writeAndBinPointLight(
 }
 
 void LightBin::writeAndBinSpotLight(
-	const SpotLightQueueElement& lightEl, LightBinContext& ctx, ClustererTestResult& testResult)
+	const SpotLightQueueElement& lightEl, BinContext& ctx, ClustererTestResult& testResult)
 {
 	I idx = ctx.m_spotLightsCount.fetchAdd(1);
 
@@ -748,7 +754,7 @@ void LightBin::writeAndBinSpotLight(
 }
 
 void LightBin::writeAndBinProbe(
-	const ReflectionProbeQueueElement& probeEl, LightBinContext& ctx, ClustererTestResult& testResult)
+	const ReflectionProbeQueueElement& probeEl, BinContext& ctx, ClustererTestResult& testResult)
 {
 	// Write it
 	ShaderProbe probe;
@@ -783,7 +789,7 @@ void LightBin::writeAndBinProbe(
 	}
 }
 
-void LightBin::writeAndBinDecal(const DecalQueueElement& decalEl, LightBinContext& ctx, ClustererTestResult& testResult)
+void LightBin::writeAndBinDecal(const DecalQueueElement& decalEl, BinContext& ctx, ClustererTestResult& testResult)
 {
 	I idx = ctx.m_decalCount.fetchAdd(1);
 	ShaderDecal& decal = ctx.m_decals[idx];

+ 16 - 11
src/anki/renderer/LightBin.h

@@ -10,9 +10,6 @@
 namespace anki
 {
 
-// Forward
-class LightBinContext;
-
 /// @addtogroup renderer
 /// @{
 
@@ -62,6 +59,17 @@ public:
 	}
 
 private:
+	class BinContext;
+	class ShaderCluster;
+	class ShaderPointLight;
+	class ShaderSpotLight;
+	class ShaderProbe;
+	class ShaderDecal;
+	class ClusterLightIndex;
+	class ClusterProbeIndex;
+	class ClusterData;
+	class WriteLightsTask;
+
 	GenericMemoryPoolAllocator<U8> m_alloc;
 	Clusterer m_clusterer;
 	U32 m_clusterCount = 0;
@@ -69,18 +77,15 @@ private:
 	StagingGpuMemoryManager* m_stagingMem = nullptr;
 	Barrier m_barrier;
 
-	void binLights(U32 threadId, PtrSize threadsCount, LightBinContext& ctx);
+	void binLights(U32 threadId, PtrSize threadsCount, BinContext& ctx);
 
-	void writeAndBinPointLight(
-		const PointLightQueueElement& lightEl, LightBinContext& ctx, ClustererTestResult& testResult);
+	void writeAndBinPointLight(const PointLightQueueElement& lightEl, BinContext& ctx, ClustererTestResult& testResult);
 
-	void writeAndBinSpotLight(
-		const SpotLightQueueElement& lightEl, LightBinContext& ctx, ClustererTestResult& testResult);
+	void writeAndBinSpotLight(const SpotLightQueueElement& lightEl, BinContext& ctx, ClustererTestResult& testResult);
 
-	void writeAndBinProbe(
-		const ReflectionProbeQueueElement& probe, LightBinContext& ctx, ClustererTestResult& testResult);
+	void writeAndBinProbe(const ReflectionProbeQueueElement& probe, BinContext& ctx, ClustererTestResult& testResult);
 
-	void writeAndBinDecal(const DecalQueueElement& decal, LightBinContext& ctx, ClustererTestResult& testResult);
+	void writeAndBinDecal(const DecalQueueElement& decal, BinContext& ctx, ClustererTestResult& testResult);
 };
 /// @}
 

+ 1 - 1
src/anki/renderer/Renderer.cpp

@@ -50,7 +50,7 @@ Error Renderer::init(ThreadPool* threadpool,
 	Timestamp* globTimestamp,
 	Bool willDrawToDefaultFbo)
 {
-	ANKI_TRACE_SCOPED_EVENT(RENDERER_INIT);
+	ANKI_TRACE_SCOPED_EVENT(R_INIT);
 
 	m_globTimestamp = globTimestamp;
 	m_threadpool = threadpool;

+ 9 - 0
src/anki/renderer/RendererObject.h

@@ -56,6 +56,15 @@ protected:
 		cmdb->drawArrays(PrimitiveTopology::TRIANGLES, 3, 1);
 	}
 
+	/// Dispatch a compute job equivelent to drawQuad
+	static void dispatchPPCompute(
+		CommandBufferPtr& cmdb, U32 workgroupSizeX, U32 workgroupSizeY, U32 outImageWidth, U32 outImageHeight)
+	{
+		const U sizeX = (outImageWidth + workgroupSizeX - 1) / workgroupSizeX;
+		const U sizeY = (outImageHeight + workgroupSizeY - 1) / workgroupSizeY;
+		cmdb->dispatchCompute(sizeX, sizeY, 1);
+	}
+
 	template<typename TPtr>
 	TPtr allocateUniforms(PtrSize size, StagingGpuMemoryToken& token)
 	{

+ 4 - 4
src/anki/renderer/ShadowMapping.cpp

@@ -143,7 +143,7 @@ Error ShadowMapping::initInternal(const ConfigSet& cfg)
 void ShadowMapping::runEsm(RenderPassWorkContext& rgraphCtx)
 {
 	ANKI_ASSERT(m_esmResolveWorkItems.getSize());
-	ANKI_TRACE_SCOPED_EVENT(RENDER_SM);
+	ANKI_TRACE_SCOPED_EVENT(R_SM);
 
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 
@@ -153,7 +153,7 @@ void ShadowMapping::runEsm(RenderPassWorkContext& rgraphCtx)
 
 	for(const EsmResolveWorkItem& workItem : m_esmResolveWorkItems)
 	{
-		ANKI_TRACE_INC_COUNTER(RENDERER_SHADOW_PASSES, 1);
+		ANKI_TRACE_INC_COUNTER(R_SHADOW_PASSES, 1);
 
 		cmdb->setViewport(
 			workItem.m_viewportOut[0], workItem.m_viewportOut[1], workItem.m_viewportOut[2], workItem.m_viewportOut[3]);
@@ -174,7 +174,7 @@ void ShadowMapping::runEsm(RenderPassWorkContext& rgraphCtx)
 void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
 {
 	ANKI_ASSERT(m_scratchWorkItems.getSize());
-	ANKI_TRACE_SCOPED_EVENT(RENDER_SM);
+	ANKI_TRACE_SCOPED_EVENT(R_SM);
 
 	CommandBufferPtr& cmdb = rgraphCtx.m_commandBuffer;
 	const U threadIdx = rgraphCtx.m_currentSecondLevelCommandBufferIndex;
@@ -202,7 +202,7 @@ void ShadowMapping::runShadowMapping(RenderPassWorkContext& rgraphCtx)
 
 void ShadowMapping::populateRenderGraph(RenderingContext& ctx)
 {
-	ANKI_TRACE_SCOPED_EVENT(RENDER_SM);
+	ANKI_TRACE_SCOPED_EVENT(R_SM);
 
 	// First process the lights
 	U32 threadCountForScratchPass = 0;

+ 1 - 4
src/anki/renderer/Ssao.cpp

@@ -178,10 +178,7 @@ void Ssao::runBlur(RenderPassWorkContext& rgraphCtx)
 	if(m_blurUseCompute)
 	{
 		rgraphCtx.bindImage(0, 0, m_runCtx.m_rts[1], TextureSubresourceInfo());
-
-		const U sizeX = (m_width + m_workgroupSize[0] - 1) / m_workgroupSize[0];
-		const U sizeY = (m_height + m_workgroupSize[1] - 1) / m_workgroupSize[1];
-		cmdb->dispatchCompute(sizeX, sizeY, 1);
+		dispatchPPCompute(cmdb, m_workgroupSize[0], m_workgroupSize[1], m_width, m_height);
 	}
 	else
 	{

+ 2 - 1
src/anki/renderer/TemporalAA.cpp

@@ -56,7 +56,8 @@ Error TemporalAA::initInternal(const ConfigSet& config)
 		m_rtTextures[i] = m_r->createAndClearRenderTarget(m_r->create2DRenderTargetInitInfo(m_r->getWidth(),
 			m_r->getHeight(),
 			LIGHT_SHADING_COLOR_ATTACHMENT_PIXEL_FORMAT,
-			TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE,
+			TextureUsageBit::SAMPLED_FRAGMENT | TextureUsageBit::FRAMEBUFFER_ATTACHMENT_WRITE
+				| TextureUsageBit::SAMPLED_COMPUTE,
 			"TemporalAA"));
 	}
 

+ 1 - 1
src/anki/resource/AsyncLoader.cpp

@@ -125,7 +125,7 @@ Error AsyncLoader::threadWorker()
 			AsyncLoaderTaskContext ctx;
 
 			{
-				ANKI_TRACE_SCOPED_EVENT(RESOURCE_ASYNC_TASK);
+				ANKI_TRACE_SCOPED_EVENT(RSRC_ASYNC_TASK);
 				err = (*task)(ctx);
 			}
 

+ 1 - 1
src/anki/resource/MeshLoader.cpp

@@ -310,7 +310,7 @@ Error MeshLoader::storeIndicesAndPosition(DynamicArrayAuto<U32>& indices, Dynami
 		// Copy
 		for(U i = 0; i < m_header.m_totalVertexCount; ++i)
 		{
-			Vec3 vert;
+			Vec3 vert(0.0f);
 			if(attrib.m_format == Format::R32G32B32_SFLOAT)
 			{
 				vert = *reinterpret_cast<Vec3*>(&staging[i * buffInfo.m_vertexStride + attrib.m_relativeOffset]);

+ 5 - 5
src/anki/resource/ResourceFilesystem.cpp

@@ -25,25 +25,25 @@ public:
 
 	ANKI_USE_RESULT Error read(void* buff, PtrSize size) override
 	{
-		ANKI_TRACE_SCOPED_EVENT(RESOURCE_FILE_READ);
+		ANKI_TRACE_SCOPED_EVENT(RSRC_FILE_READ);
 		return m_file.read(buff, size);
 	}
 
 	ANKI_USE_RESULT Error readAllText(GenericMemoryPoolAllocator<U8> alloc, String& out) override
 	{
-		ANKI_TRACE_SCOPED_EVENT(RESOURCE_FILE_READ);
+		ANKI_TRACE_SCOPED_EVENT(RSRC_FILE_READ);
 		return m_file.readAllText(alloc, out);
 	}
 
 	ANKI_USE_RESULT Error readU32(U32& u) override
 	{
-		ANKI_TRACE_SCOPED_EVENT(RESOURCE_FILE_READ);
+		ANKI_TRACE_SCOPED_EVENT(RSRC_FILE_READ);
 		return m_file.readU32(u);
 	}
 
 	ANKI_USE_RESULT Error readF32(F32& f) override
 	{
-		ANKI_TRACE_SCOPED_EVENT(RESOURCE_FILE_READ);
+		ANKI_TRACE_SCOPED_EVENT(RSRC_FILE_READ);
 		return m_file.readF32(f);
 	}
 
@@ -128,7 +128,7 @@ public:
 
 	ANKI_USE_RESULT Error read(void* buff, PtrSize size) override
 	{
-		ANKI_TRACE_SCOPED_EVENT(RESOURCE_FILE_READ);
+		ANKI_TRACE_SCOPED_EVENT(RSRC_FILE_READ);
 
 		I64 readSize = unzReadCurrentFile(m_archive, buff, size);
 

+ 1 - 1
src/anki/resource/TransferGpuAllocator.cpp

@@ -118,7 +118,7 @@ Error TransferGpuAllocator::init(PtrSize maxSize, GrManager* gr, ResourceAllocat
 
 Error TransferGpuAllocator::allocate(PtrSize size, TransferGpuAllocatorHandle& handle)
 {
-	ANKI_TRACE_SCOPED_EVENT(RESOURCE_ALLOCATE_TRANSFER);
+	ANKI_TRACE_SCOPED_EVENT(RSRC_ALLOCATE_TRANSFER);
 
 	const PtrSize frameSize = m_maxAllocSize / FRAME_COUNT;
 

+ 30 - 28
src/anki/scene/SceneGraph.cpp

@@ -200,40 +200,43 @@ Error SceneGraph::update(Second prevUpdateTime, Second crntTime)
 	m_frameAlloc.getMemoryPool().reset();
 
 	// Delete stuff
-	ANKI_TRACE_START_EVENT(SCENE_DELETE_STUFF);
-	m_events.deleteEventsMarkedForDeletion();
-	deleteNodesMarkedForDeletion();
-	ANKI_TRACE_STOP_EVENT(SCENE_DELETE_STUFF);
+	{
+		ANKI_TRACE_SCOPED_EVENT(SCENE_MARKED_FOR_DELETION);
+		m_events.deleteEventsMarkedForDeletion();
+		deleteNodesMarkedForDeletion();
+	}
 
 	ThreadPool& threadPool = *m_threadpool;
 	(void)threadPool;
 
 	// Update
-	ANKI_TRACE_START_EVENT(SCENE_PHYSICS_UPDATE);
-	m_physics->updateAsync(crntTime - prevUpdateTime);
-	m_physics->waitUpdate();
-	ANKI_TRACE_STOP_EVENT(SCENE_PHYSICS_UPDATE);
-
-	ANKI_TRACE_START_EVENT(SCENE_NODES_UPDATE);
-	ANKI_CHECK(m_events.updateAllEvents(prevUpdateTime, crntTime));
-
-	// Then the rest
-	Array<UpdateSceneNodesTask, ThreadPool::MAX_THREADS> jobs2;
-	UpdateSceneNodesCtx updateCtx;
-	updateCtx.m_scene = this;
-	updateCtx.m_crntNode = m_nodes.getBegin();
-	updateCtx.m_prevUpdateTime = prevUpdateTime;
-	updateCtx.m_crntTime = crntTime;
-
-	for(U i = 0; i < threadPool.getThreadCount(); i++)
 	{
-		UpdateSceneNodesTask& job = jobs2[i];
-		job.m_ctx = &updateCtx;
-		threadPool.assignNewTask(i, &job);
+		ANKI_TRACE_SCOPED_EVENT(SCENE_PHYSICS_UPDATE);
+		m_physics->updateAsync(crntTime - prevUpdateTime);
+		m_physics->waitUpdate();
 	}
 
-	ANKI_CHECK(threadPool.waitForAllThreadsToFinish());
-	ANKI_TRACE_STOP_EVENT(SCENE_NODES_UPDATE);
+	{
+		ANKI_TRACE_SCOPED_EVENT(SCENE_NODES_UPDATE);
+		ANKI_CHECK(m_events.updateAllEvents(prevUpdateTime, crntTime));
+
+		// Then the rest
+		Array<UpdateSceneNodesTask, ThreadPool::MAX_THREADS> jobs2;
+		UpdateSceneNodesCtx updateCtx;
+		updateCtx.m_scene = this;
+		updateCtx.m_crntNode = m_nodes.getBegin();
+		updateCtx.m_prevUpdateTime = prevUpdateTime;
+		updateCtx.m_crntTime = crntTime;
+
+		for(U i = 0; i < threadPool.getThreadCount(); i++)
+		{
+			UpdateSceneNodesTask& job = jobs2[i];
+			job.m_ctx = &updateCtx;
+			threadPool.assignNewTask(i, &job);
+		}
+
+		ANKI_CHECK(threadPool.waitForAllThreadsToFinish());
+	}
 
 	m_stats.m_updateTime = HighRezTimer::getCurrentTime() - m_stats.m_updateTime;
 	return Error::NONE;
@@ -279,7 +282,7 @@ Error SceneGraph::updateNode(Second prevTime, Second crntTime, SceneNode& node)
 
 Error SceneGraph::updateNodes(UpdateSceneNodesCtx& ctx) const
 {
-	ANKI_TRACE_START_EVENT(SCENE_NODES_UPDATE);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_NODES_UPDATE);
 
 	IntrusiveList<SceneNode>::Iterator& it = ctx.m_crntNode;
 	IntrusiveList<SceneNode>::ConstIterator end = m_nodes.getEnd();
@@ -324,7 +327,6 @@ Error SceneGraph::updateNodes(UpdateSceneNodesCtx& ctx) const
 		}
 	}
 
-	ANKI_TRACE_STOP_EVENT(SCENE_NODES_UPDATE);
 	return err;
 }
 

+ 1 - 2
src/anki/scene/SoftwareRasterizer.cpp

@@ -300,9 +300,8 @@ void SoftwareRasterizer::rasterizeTriangle(const Vec4* tri)
 
 Bool SoftwareRasterizer::visibilityTest(const CollisionShape& cs, const Aabb& aabb) const
 {
-	ANKI_TRACE_START_EVENT(SCENE_RASTERIZER_TEST);
+	ANKI_TRACE_SCOPED_EVENT(SCENE_RASTERIZER_TEST);
 	Bool inside = visibilityTestInternal(cs, aabb);
-	ANKI_TRACE_STOP_EVENT(SCENE_RASTERIZER_TEST);
 
 	return inside;
 }

+ 1 - 2
src/anki/scene/VisibilityInternal.h

@@ -195,11 +195,10 @@ public:
 private:
 	void gather()
 	{
-		ANKI_TRACE_START_EVENT(SCENE_VISIBILITY_ITERATE_SECTORS);
+		ANKI_TRACE_SCOPED_EVENT(SCENE_VISIBILITY_ITERATE_SECTORS);
 		U testIdx = m_visCtx->m_testsCount.fetchAdd(1);
 
 		m_visCtx->m_scene->getSectorGroup().findVisibleNodes(*m_frc, testIdx, m_r, m_sectorsCtx);
-		ANKI_TRACE_STOP_EVENT(SCENE_VISIBILITY_ITERATE_SECTORS);
 	}
 };
 

+ 19 - 19
src/anki/util/String.h

@@ -118,12 +118,12 @@ public:
 	}
 
 	/// Return true if the string is not initialized.
-	Bool isEmpty() const
+	bool isEmpty() const
 	{
 		return m_ptr == nullptr || getLength() == 0;
 	}
 
-	Bool operator==(const CString& b) const
+	bool operator==(const CString& b) const
 	{
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		{
@@ -135,12 +135,12 @@ public:
 		}
 	}
 
-	Bool operator!=(const CString& b) const
+	bool operator!=(const CString& b) const
 	{
 		return !((*this) == b);
 	}
 
-	Bool operator<(const CString& b) const
+	bool operator<(const CString& b) const
 	{
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		{
@@ -152,7 +152,7 @@ public:
 		}
 	}
 
-	Bool operator<=(const CString& b) const
+	bool operator<=(const CString& b) const
 	{
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		{
@@ -164,7 +164,7 @@ public:
 		}
 	}
 
-	Bool operator>(const CString& b) const
+	bool operator>(const CString& b) const
 	{
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		{
@@ -176,7 +176,7 @@ public:
 		}
 	}
 
-	Bool operator>=(const CString& b) const
+	bool operator>=(const CString& b) const
 	{
 		if(m_ptr == nullptr || b.m_ptr == nullptr)
 		{
@@ -375,7 +375,7 @@ public:
 	}
 
 	/// Return true if strings are equal
-	Bool operator==(const String& b) const
+	bool operator==(const String& b) const
 	{
 		checkInit();
 		b.checkInit();
@@ -383,13 +383,13 @@ public:
 	}
 
 	/// Return true if strings are not equal
-	Bool operator!=(const String& b) const
+	bool operator!=(const String& b) const
 	{
 		return !(*this == b);
 	}
 
 	/// Return true if this is less than b
-	Bool operator<(const String& b) const
+	bool operator<(const String& b) const
 	{
 		checkInit();
 		b.checkInit();
@@ -397,7 +397,7 @@ public:
 	}
 
 	/// Return true if this is less or equal to b
-	Bool operator<=(const String& b) const
+	bool operator<=(const String& b) const
 	{
 		checkInit();
 		b.checkInit();
@@ -405,7 +405,7 @@ public:
 	}
 
 	/// Return true if this is greater than b
-	Bool operator>(const String& b) const
+	bool operator>(const String& b) const
 	{
 		checkInit();
 		b.checkInit();
@@ -413,7 +413,7 @@ public:
 	}
 
 	/// Return true if this is greater or equal to b
-	Bool operator>=(const String& b) const
+	bool operator>=(const String& b) const
 	{
 		checkInit();
 		b.checkInit();
@@ -421,41 +421,41 @@ public:
 	}
 
 	/// Return true if strings are equal
-	Bool operator==(const CStringType& cstr) const
+	bool operator==(const CStringType& cstr) const
 	{
 		checkInit();
 		return std::strcmp(&m_data[0], cstr.get()) == 0;
 	}
 
 	/// Return true if strings are not equal
-	Bool operator!=(const CStringType& cstr) const
+	bool operator!=(const CStringType& cstr) const
 	{
 		return !(*this == cstr);
 	}
 
 	/// Return true if this is less than cstr.
-	Bool operator<(const CStringType& cstr) const
+	bool operator<(const CStringType& cstr) const
 	{
 		checkInit();
 		return std::strcmp(&m_data[0], cstr.get()) < 0;
 	}
 
 	/// Return true if this is less or equal to cstr.
-	Bool operator<=(const CStringType& cstr) const
+	bool operator<=(const CStringType& cstr) const
 	{
 		checkInit();
 		return std::strcmp(&m_data[0], cstr.get()) <= 0;
 	}
 
 	/// Return true if this is greater than cstr.
-	Bool operator>(const CStringType& cstr) const
+	bool operator>(const CStringType& cstr) const
 	{
 		checkInit();
 		return std::strcmp(&m_data[0], cstr.get()) > 0;
 	}
 
 	/// Return true if this is greater or equal to cstr.
-	Bool operator>=(const CStringType& cstr) const
+	bool operator>=(const CStringType& cstr) const
 	{
 		checkInit();
 		return std::strcmp(&m_data[0], cstr.get()) >= 0;

+ 298 - 131
src/anki/util/Tracer.cpp

@@ -10,20 +10,88 @@
 namespace anki
 {
 
-thread_local Tracer::ThreadLocal Tracer::m_threadLocal;
+/// Lightweight event storage.
+class Tracer::Event
+{
+public:
+	const char* m_name;
+	Second m_timestamp;
+	Second m_duration;
+};
+
+/// Event batch allocation.
+class Tracer::EventsChunk : public IntrusiveListEnabled<EventsChunk>
+{
+public:
+	Array<Event, EVENTS_PER_CHUNK> m_events;
+	U32 m_eventCount = 0;
+};
+
+/// A heavyweight event with more info.
+class Tracer::GatherEvent
+{
+public:
+	CString m_name;
+	Second m_timestamp;
+	Second m_duration;
+	ThreadId m_tid;
+};
+
+/// Lightweight counter storage.
+class Tracer::Counter
+{
+public:
+	const char* m_name;
+	U64 m_value;
+};
+
+/// Counter batch allocation.
+class Tracer::CountersChunk : public IntrusiveListEnabled<CountersChunk>
+{
+public:
+	U64 m_frame;
+	Second m_startFrameTime;
+	Array<Counter, COUNTERS_PER_CHUNK> m_counters;
+	U32 m_counterCount = 0;
+};
+
+/// Heavyweight counter storage.
+class Tracer::GatherCounter
+{
+public:
+	CString m_name;
+	U64 m_value;
+};
+
+/// Thread local storage.
+class Tracer::ThreadLocal
+{
+public:
+	ThreadId m_tid ANKI_DBG_NULLIFY;
+
+	IntrusiveList<CountersChunk> m_counterChunks;
+	IntrusiveList<EventsChunk> m_eventChunks;
+};
+
+thread_local Tracer::ThreadLocal* Tracer::m_threadLocal = nullptr;
 
+/// Storage of counters per frame.
 class Tracer::PerFrameCounters
 {
 public:
-	DynamicArrayAuto<Counter> m_counters;
-	U32 m_frameIdx;
+	DynamicArrayAuto<GatherCounter> m_counters;
+	DynamicArrayAuto<GatherCounter> m_tempCounters; ///< A temp storage.
+	U64 m_frame;
+	Second m_startFrameTime;
 
 	PerFrameCounters(GenericMemoryPoolAllocator<U8> alloc)
 		: m_counters(alloc)
+		, m_tempCounters(alloc)
 	{
 	}
 };
 
+/// Context for Tracer::flush().
 class Tracer::FlushCtx
 {
 public:
@@ -31,7 +99,7 @@ public:
 	CString m_filename;
 	DynamicArrayAuto<CString> m_counterNames;
 	DynamicArrayAuto<PerFrameCounters> m_counters;
-	DynamicArrayAuto<Event> m_events;
+	DynamicArrayAuto<GatherEvent> m_events;
 
 	FlushCtx(GenericMemoryPoolAllocator<U8> alloc, const CString& filename)
 		: m_alloc(alloc)
@@ -47,75 +115,82 @@ Tracer::~Tracer()
 {
 	for(ThreadLocal* threadLocal : m_allThreadLocal)
 	{
-		while(!threadLocal->m_counters.isEmpty())
+		while(!threadLocal->m_counterChunks.isEmpty())
 		{
-			Counter& counter = threadLocal->m_counters.getFront();
-			threadLocal->m_counters.popFront();
-			threadLocal->m_counterAlloc.deleteInstance(m_alloc, &counter);
+			CountersChunk& chunk = threadLocal->m_counterChunks.getFront();
+			threadLocal->m_counterChunks.popFront();
+			m_alloc.deleteInstance(&chunk);
 		}
 
-		while(!threadLocal->m_events.isEmpty())
+		while(!threadLocal->m_eventChunks.isEmpty())
 		{
-			Event& event = threadLocal->m_events.getFront();
-			threadLocal->m_events.popFront();
-			threadLocal->m_eventAlloc.deleteInstance(m_alloc, &event);
+			EventsChunk& chunk = threadLocal->m_eventChunks.getFront();
+			threadLocal->m_eventChunks.popFront();
+			m_alloc.deleteInstance(&chunk);
 		}
+
+		m_alloc.deleteInstance(threadLocal);
 	}
 
 	m_allThreadLocal.destroy(m_alloc);
-	m_frames.destroy(m_alloc);
 }
 
 void Tracer::newFrame(U64 frame)
 {
-#if ANKI_ASSERTS_ENABLED
-	if(m_frames.getSize() > 0)
-	{
-		ANKI_ASSERT(frame > m_frames.getBack().m_frame);
-	}
-#endif
+	ANKI_ASSERT(frame == 0 || frame > m_frame);
+
+	LockGuard<SpinLock> lock(m_frameMtx);
 
-	Frame f;
-	f.m_startFrameTime = HighRezTimer::getCurrentTime();
-	f.m_frame = frame;
-	m_frames.emplaceBack(m_alloc, f);
+	m_startFrameTime = HighRezTimer::getCurrentTime();
+	m_frame = frame;
 }
 
 Tracer::ThreadLocal& Tracer::getThreadLocal()
 {
-	ThreadLocal& out = m_threadLocal;
-	if(ANKI_UNLIKELY(!out.m_tracerKnowsAboutThis))
+	ThreadLocal* out = m_threadLocal;
+	if(ANKI_UNLIKELY(out == nullptr))
 	{
+		out = m_alloc.newInstance<ThreadLocal>();
+		out->m_tid = Thread::getCurrentThreadId();
+		m_threadLocal = out;
+
+		// Store it
 		LockGuard<Mutex> lock(m_threadLocalMtx);
-		m_allThreadLocal.emplaceBack(m_alloc, &out);
-		out.m_tid = Thread::getCurrentThreadId();
-		out.m_tracerKnowsAboutThis = true;
+		m_allThreadLocal.emplaceBack(m_alloc, out);
 	}
 
-	return out;
+	return *out;
 }
 
-void Tracer::beginEvent()
+TracerEventHandle Tracer::beginEvent()
 {
 	ThreadLocal& threadLocal = getThreadLocal();
-	Event* event = threadLocal.m_eventAlloc.newInstance(m_alloc);
+
+	// Allocate new chunk
+	if(threadLocal.m_eventChunks.isEmpty() || threadLocal.m_eventChunks.getBack().m_eventCount >= EVENTS_PER_CHUNK)
+	{
+		EventsChunk* chunk = m_alloc.newInstance<EventsChunk>();
+		threadLocal.m_eventChunks.pushBack(chunk);
+	}
+
+	EventsChunk& chunk = threadLocal.m_eventChunks.getBack();
+	Event* event = &chunk.m_events[chunk.m_eventCount++];
 	event->m_timestamp = HighRezTimer::getCurrentTime();
-	threadLocal.m_events.pushBack(event);
+
+	return event;
 }
 
-void Tracer::endEvent(const char* eventName)
+void Tracer::endEvent(const char* eventName, TracerEventHandle eventHandle)
 {
 	ANKI_ASSERT(eventName);
+	ANKI_ASSERT(eventHandle);
 
-	// Set the time in the event
-	ThreadLocal& threadLocal = getThreadLocal();
-	ANKI_ASSERT(!threadLocal.m_events.isEmpty());
-	Event& event = threadLocal.m_events.getBack();
-	event.m_name = eventName;
-	event.m_duration = HighRezTimer::getCurrentTime() - event.m_timestamp;
+	Event* event = static_cast<Event*>(eventHandle);
+	event->m_name = eventName;
+	event->m_duration = HighRezTimer::getCurrentTime() - event->m_timestamp;
 
 	// Store a counter as well. In ns
-	increaseCounter(eventName, U64(event.m_duration * 1000000000.0));
+	increaseCounter(eventName, U64(event->m_duration * 1000000000.0));
 }
 
 void Tracer::increaseCounter(const char* counterName, U64 value)
@@ -123,103 +198,140 @@ void Tracer::increaseCounter(const char* counterName, U64 value)
 	ANKI_ASSERT(counterName);
 
 	ThreadLocal& threadLocal = getThreadLocal();
-	Counter* counter = threadLocal.m_counterAlloc.newInstance(m_alloc);
-	counter->m_name = counterName;
-	counter->m_value = value;
-	counter->m_frameIdx = m_frames.getSize() - 1;
 
-	threadLocal.m_counters.pushBack(counter);
+	// Create chunk
+	if(threadLocal.m_counterChunks.isEmpty() || threadLocal.m_counterChunks.getBack().m_frame != m_frame
+		|| threadLocal.m_counterChunks.getBack().m_counterCount >= COUNTERS_PER_CHUNK)
+	{
+		CountersChunk* newChunk = m_alloc.newInstance<CountersChunk>();
+		threadLocal.m_counterChunks.pushBack(newChunk);
+
+		{
+			LockGuard<SpinLock> lock(m_frameMtx);
+			newChunk->m_frame = m_frame;
+			newChunk->m_startFrameTime = m_startFrameTime;
+		}
+	}
+
+	CountersChunk& chunk = threadLocal.m_counterChunks.getBack();
+
+	Counter& counter = chunk.m_counters[chunk.m_counterCount++];
+	counter.m_name = counterName;
+	counter.m_value = value;
 }
 
 void Tracer::gatherCounters(FlushCtx& ctx)
 {
-	// Gather all the counters
-	DynamicArrayAuto<Counter> allCounters(m_alloc);
+	// Iterate all the chunks and create the PerFrameCounters
 	for(ThreadLocal* threadLocal : m_allThreadLocal)
 	{
-		while(!threadLocal->m_counters.isEmpty())
+		while(!threadLocal->m_counterChunks.isEmpty())
 		{
-			// Pop counter
-			Counter& inCounter = threadLocal->m_counters.getFront();
-			threadLocal->m_counters.popFront();
+			// Pop chunk
+			CountersChunk& chunk = threadLocal->m_counterChunks.getFront();
+			threadLocal->m_counterChunks.popFront();
 
-			// Copy
-			Counter newCounter = inCounter;
-			allCounters.emplaceBack(newCounter);
+			// Iterate the PerFrameCounters to find if the frame is present
+			PerFrameCounters* perFrame = nullptr;
+			for(PerFrameCounters& pf : ctx.m_counters)
+			{
+				if(pf.m_frame == chunk.m_frame)
+				{
+					perFrame = &pf;
+					break;
+				}
+			}
+
+			if(!perFrame)
+			{
+				ctx.m_counters.emplaceBack(m_alloc);
+
+				perFrame = &ctx.m_counters.getBack();
+				perFrame->m_frame = chunk.m_frame;
+				perFrame->m_startFrameTime = chunk.m_startFrameTime;
+			}
+
+			ANKI_ASSERT(chunk.m_frame == perFrame->m_frame);
 
-			// Delete poped counter
-			threadLocal->m_counterAlloc.deleteInstance(m_alloc, &inCounter);
+			// Copy the counters
+			for(U i = 0; i < chunk.m_counterCount; ++i)
+			{
+				const Counter& inCounter = chunk.m_counters[i];
+
+				GatherCounter outCounter;
+				outCounter.m_name = inCounter.m_name;
+				outCounter.m_value = inCounter.m_value;
+
+				perFrame->m_tempCounters.emplaceBack(outCounter);
+			}
+
+			// Delete chunk
+			m_alloc.deleteInstance(&chunk);
 		}
 	}
 
-	if(allCounters.getSize() == 0)
+	if(ctx.m_counters.getSize() == 0)
 	{
 		// Early exit
 		return;
 	}
 
-	// Sort them
-	std::sort(allCounters.getBegin(), allCounters.getEnd(), [](const Counter& a, const Counter& b) {
-		if(a.m_frameIdx != b.m_frameIdx)
-		{
-			return a.m_frameIdx < b.m_frameIdx;
-		}
-
-		ANKI_ASSERT(a.m_name && b.m_name);
-		return a.m_name < b.m_name;
-	});
-
-	// Compact them
-	for(U i = 0; i < allCounters.getSize(); ++i)
+	// Compact the counters and get all counter names
+	for(PerFrameCounters& perFrame : ctx.m_counters)
 	{
-		const Counter& inCounter = allCounters[i];
-
-		// Create new frame
-		if(ctx.m_counters.getSize() == 0 || ctx.m_counters.getBack().m_frameIdx != inCounter.m_frameIdx)
+		if(perFrame.m_tempCounters.getSize() == 0)
 		{
-			ctx.m_counters.emplaceBack(m_alloc);
-			ctx.m_counters.getBack().m_frameIdx = inCounter.m_frameIdx;
+			continue;
 		}
 
-		PerFrameCounters& crntFrame = ctx.m_counters.getBack();
+		// Sort counters
+		std::sort(perFrame.m_tempCounters.getBegin(),
+			perFrame.m_tempCounters.getEnd(),
+			[](const GatherCounter& a, const GatherCounter& b) { return a.m_name < b.m_name; });
 
-		// Check if we have a new counter
-		if(crntFrame.m_counters.getSize() == 0 || CString(crntFrame.m_counters.getBack().m_name) != inCounter.m_name)
+		// Compact counters
+		for(const GatherCounter& tmpCounter : perFrame.m_tempCounters)
 		{
-			// Create new counter
-			crntFrame.m_counters.emplaceBack(inCounter);
-
-			// Update the counter names
-			Bool found = false;
-			for(const CString& counterName : ctx.m_counterNames)
+			if(perFrame.m_counters.getSize() == 0 || perFrame.m_counters.getBack().m_name != tmpCounter.m_name)
 			{
-				if(counterName == inCounter.m_name)
+				// Create new counter
+				perFrame.m_counters.emplaceBack(tmpCounter);
+
+				// Update the counter names
+				Bool found = false;
+				for(const CString& counterName : ctx.m_counterNames)
 				{
-					found = true;
-					break;
+					if(counterName == tmpCounter.m_name)
+					{
+						found = true;
+						break;
+					}
 				}
-			}
 
-			if(!found)
+				if(!found)
+				{
+					ctx.m_counterNames.emplaceBack(tmpCounter.m_name);
+				}
+			}
+			else
 			{
-				ctx.m_counterNames.emplaceBack(CString(inCounter.m_name));
+				// Merge counters
+				GatherCounter& mergeTo = perFrame.m_counters.getBack();
+				ANKI_ASSERT(mergeTo.m_name == tmpCounter.m_name);
+				mergeTo.m_value += tmpCounter.m_value;
 			}
 		}
-		else
-		{
-			// Merge counters
-			Counter& mergeTo = crntFrame.m_counters.getBack();
-			ANKI_ASSERT(CString(mergeTo.m_name) == inCounter.m_name);
-			ANKI_ASSERT(mergeTo.m_frameIdx == inCounter.m_frameIdx);
-			mergeTo.m_value += inCounter.m_value;
-		}
+
+		// Free some memory
+		perFrame.m_tempCounters.destroy();
 	}
 
 	// Sort the counter names
 	ANKI_ASSERT(ctx.m_counterNames.getSize() > 0);
 	std::sort(ctx.m_counterNames.getBegin(), ctx.m_counterNames.getEnd(), [](CString a, CString b) { return a < b; });
 
-	// Fill the gaps. Some counters might have not appeared in some frames
+	// Fill the gaps. Some counters might have not appeared in some frames. Those counters need to have a zero value
+	// because the CSV wants all counters present on all rows
 	for(PerFrameCounters& perFrame : ctx.m_counters)
 	{
 		ANKI_ASSERT(perFrame.m_counters.getSize() <= ctx.m_counterNames.getSize());
@@ -230,7 +342,7 @@ void Tracer::gatherCounters(FlushCtx& ctx)
 
 			// Try to find the counter
 			Bool found = false;
-			for(const Counter& c : perFrame.m_counters)
+			for(const GatherCounter& c : perFrame.m_counters)
 			{
 				if(counterName == c.m_name)
 				{
@@ -242,18 +354,17 @@ void Tracer::gatherCounters(FlushCtx& ctx)
 			if(!found)
 			{
 				// Counter is missing
-				Counter missingCounter;
-				missingCounter.m_frameIdx = perFrame.m_frameIdx;
-				missingCounter.m_name = counterName.cstr();
+				GatherCounter missingCounter;
+				missingCounter.m_name = counterName;
 				missingCounter.m_value = 0;
 				perFrame.m_counters.emplaceBack(missingCounter);
 			}
 		}
 
-		std::sort(perFrame.m_counters.getBegin(), perFrame.m_counters.getEnd(), [](const Counter& a, const Counter& b) {
-			ANKI_ASSERT(a.m_name && b.m_name);
-			return CString(a.m_name) < CString(b.m_name);
-		});
+		// Sort again
+		std::sort(perFrame.m_counters.getBegin(),
+			perFrame.m_counters.getEnd(),
+			[](const GatherCounter& a, const GatherCounter& b) { return a.m_name < b.m_name; });
 
 		ANKI_ASSERT(perFrame.m_counters.getSize() == ctx.m_counterNames.getSize());
 	}
@@ -263,25 +374,44 @@ void Tracer::gatherEvents(FlushCtx& ctx)
 {
 	for(ThreadLocal* threadLocal : m_allThreadLocal)
 	{
-		while(!threadLocal->m_events.isEmpty())
+		while(!threadLocal->m_eventChunks.isEmpty())
 		{
-			// Pop event
-			Event& inEvent = threadLocal->m_events.getFront();
-			threadLocal->m_events.popFront();
+			// Pop chunk
+			EventsChunk& chunk = threadLocal->m_eventChunks.getFront();
+			threadLocal->m_eventChunks.popFront();
 
 			// Copy
-			Event newEvent = inEvent;
-			newEvent.m_tid = threadLocal->m_tid;
-			ctx.m_events.emplaceBack(newEvent);
+			for(U i = 0; i < chunk.m_eventCount; ++i)
+			{
+				const Event& inEvent = chunk.m_events[i];
+
+				GatherEvent outEvent;
+				outEvent.m_duration = inEvent.m_duration;
+				outEvent.m_name = inEvent.m_name;
+				outEvent.m_timestamp = inEvent.m_timestamp;
+				outEvent.m_tid = threadLocal->m_tid;
+
+				ctx.m_events.emplaceBack(outEvent);
+			}
 
-			// Delete poped event
-			threadLocal->m_eventAlloc.deleteInstance(m_alloc, &inEvent);
+			// Delete poped chunk
+			m_alloc.deleteInstance(&chunk);
 		}
 	}
 
 	// Sort them
-	std::sort(ctx.m_events.getBegin(), ctx.m_events.getEnd(), [](const Event& a, const Event& b) {
-		return a.m_timestamp < b.m_timestamp;
+	std::sort(ctx.m_events.getBegin(), ctx.m_events.getEnd(), [](const GatherEvent& a, const GatherEvent& b) {
+		if(a.m_timestamp != b.m_timestamp)
+		{
+			return a.m_timestamp < b.m_timestamp;
+		}
+
+		if(a.m_duration != b.m_duration)
+		{
+			return a.m_duration < b.m_duration;
+		}
+
+		return a.m_name < b.m_name;
 	});
 }
 
@@ -289,7 +419,7 @@ Error Tracer::writeTraceJson(const FlushCtx& ctx)
 {
 	// Open the file
 	StringAuto newFname(m_alloc);
-	newFname.sprintf("%s_trace.json", ctx.m_filename.cstr());
+	newFname.sprintf("%s.trace.json", ctx.m_filename.cstr());
 	File file;
 	ANKI_CHECK(file.open(newFname.toCString(), FileOpenFlag::WRITE));
 
@@ -302,7 +432,7 @@ Error Tracer::writeTraceJson(const FlushCtx& ctx)
 	ANKI_CHECK(file.writeText("[\n"));
 
 	// Write the events to the file
-	for(const Event& event : ctx.m_events)
+	for(const GatherEvent& event : ctx.m_events)
 	{
 		const U64 startMicroSec = U64(event.m_timestamp * 1000000.0);
 		const U64 durMicroSec = U64(event.m_duration * 1000000.0);
@@ -314,7 +444,7 @@ Error Tracer::writeTraceJson(const FlushCtx& ctx)
 
 		ANKI_CHECK(file.writeText("{\"name\": \"%s\", \"cat\": \"PERF\", \"ph\": \"X\", "
 								  "\"pid\": 1, \"tid\": %llu, \"ts\": %llu, \"dur\": %llu},\n",
-			event.m_name,
+			event.m_name.cstr(),
 			event.m_tid,
 			startMicroSec,
 			durMicroSec));
@@ -324,19 +454,19 @@ Error Tracer::writeTraceJson(const FlushCtx& ctx)
 	for(U i = 0; i < ctx.m_counters.getSize(); ++i)
 	{
 		const PerFrameCounters& frame = ctx.m_counters[i];
-		const Second startFrameTime = m_frames[frame.m_frameIdx].m_startFrameTime;
+		const Second startFrameTime = frame.m_startFrameTime;
 
-		// TODO
+		// The counters need a range in order to appear. Add a dummy counter for the last frame
 		const Array<Second, 2> timestamps = {{startFrameTime, startFrameTime + 1.0}};
 		const U timestampCount = (i < ctx.m_counters.getSize() - 1) ? 1 : 2;
 
-		for(const Counter& counter : frame.m_counters)
+		for(const GatherCounter& counter : frame.m_counters)
 		{
 			for(U j = 0; j < timestampCount; ++j)
 			{
 				ANKI_CHECK(file.writeText("{\"name\": \"%s\", \"cat\": \"PERF\", \"ph\": \"C\", "
 										  "\"pid\": 1, \"ts\": %llu, \"args\": {\"val\": %llu}},\n",
-					counter.m_name,
+					counter.m_name.cstr(),
 					U64(timestamps[j] * 1000000.0),
 					counter.m_value));
 			}
@@ -352,7 +482,7 @@ Error Tracer::writeCounterCsv(const FlushCtx& ctx)
 {
 	// Open the file
 	StringAuto fname(m_alloc);
-	fname.sprintf("%s_counters.csv", ctx.m_filename.cstr());
+	fname.sprintf("%s.counters.csv", ctx.m_filename.cstr());
 	File file;
 	ANKI_CHECK(file.open(fname.toCString(), FileOpenFlag::WRITE));
 
@@ -371,16 +501,36 @@ Error Tracer::writeCounterCsv(const FlushCtx& ctx)
 	ANKI_CHECK(file.writeText("\n"));
 
 	// Dump the frames
+	U rowCount = 0;
 	for(const PerFrameCounters& frame : ctx.m_counters)
 	{
-		ANKI_CHECK(file.writeText("%llu", m_frames[frame.m_frameIdx].m_frame));
+		ANKI_CHECK(file.writeText("%llu", frame.m_frame));
 
-		for(const Counter& c : frame.m_counters)
+		for(const GatherCounter& c : frame.m_counters)
 		{
 			ANKI_CHECK(file.writeText(",%llu", c.m_value));
 		}
 
 		ANKI_CHECK(file.writeText("\n"));
+		++rowCount;
+	}
+
+	// Dump some spreadsheet functions
+	ANKI_CHECK(file.writeText("SUM"));
+	for(U i = 0; i < ctx.m_counterNames.getSize(); ++i)
+	{
+		Array<char, 3> columnName;
+		getSpreadsheetColumnName(i + 1, columnName);
+		ANKI_CHECK(file.writeText(",=SUM(%s2:%s%u)", &columnName[0], &columnName[0], rowCount + 1u));
+	}
+	ANKI_CHECK(file.writeText("\n"));
+
+	ANKI_CHECK(file.writeText("AVG"));
+	for(U i = 0; i < ctx.m_counterNames.getSize(); ++i)
+	{
+		Array<char, 3> columnName;
+		getSpreadsheetColumnName(i + 1, columnName);
+		ANKI_CHECK(file.writeText(",=AVERAGE(%s2:%s%u)", &columnName[0], &columnName[0], rowCount + 1u));
 	}
 
 	return Error::NONE;
@@ -396,9 +546,26 @@ Error Tracer::flush(CString filename)
 	ANKI_CHECK(writeTraceJson(ctx));
 	ANKI_CHECK(writeCounterCsv(ctx));
 
-	m_frames.destroy(m_alloc);
-
 	return Error::NONE;
 }
 
+void Tracer::getSpreadsheetColumnName(U column, Array<char, 3>& arr)
+{
+	U major = column / 26;
+	U minor = column % 26;
+
+	if(major)
+	{
+		arr[0] = 'A' + (major - 1);
+		arr[1] = 'A' + minor;
+	}
+	else
+	{
+		arr[0] = 'A' + minor;
+		arr[1] = '\0';
+	}
+
+	arr[2] = '\0';
+}
+
 } // end namespace anki

+ 28 - 45
src/anki/util/Tracer.h

@@ -16,6 +16,9 @@ namespace anki
 /// @addtogroup util_other
 /// @{
 
+/// @memberof Tracer
+using TracerEventHandle = void*;
+
 /// Tracer.
 class Tracer : public NonCopyable
 {
@@ -37,10 +40,10 @@ public:
 	}
 
 	/// Begin a new event.
-	void beginEvent();
+	ANKI_USE_RESULT TracerEventHandle beginEvent();
 
 	/// End the event that got started with beginEvent().
-	void endEvent(const char* eventName);
+	void endEvent(const char* eventName, TracerEventHandle event);
 
 	/// Increase a counter.
 	void increaseCounter(const char* counterName, U64 value);
@@ -52,54 +55,31 @@ public:
 	ANKI_USE_RESULT Error flush(CString filename);
 
 private:
-	GenericMemoryPoolAllocator<U8> m_alloc;
+	static const U32 EVENTS_PER_CHUNK = 256;
+	static const U32 COUNTERS_PER_CHUNK = 512;
 
-	class Frame
-	{
-	public:
-		U64 m_frame;
-		Second m_startFrameTime; ///< When the frame started
-	};
+	class Event;
+	class EventsChunk;
+	class GatherEvent;
 
-	DynamicArray<Frame> m_frames;
+	class Counter;
+	class CountersChunk;
+	class GatherCounter;
 
-	/// Event.
-	class Event : public IntrusiveListEnabled<Event>
-	{
-	public:
-		const char* m_name ANKI_DBG_NULLIFY;
-		Second m_timestamp ANKI_DBG_NULLIFY;
-		Second m_duration ANKI_DBG_NULLIFY;
-		ThreadId m_tid ANKI_DBG_NULLIFY;
-	};
-
-	/// Counter.
-	class Counter : public IntrusiveListEnabled<Counter>
-	{
-	public:
-		const char* m_name ANKI_DBG_NULLIFY;
-		U64 m_value ANKI_DBG_NULLIFY;
-		U32 m_frameIdx ANKI_DBG_NULLIFY;
-	};
+	class ThreadLocal;
+	class PerFrameCounters;
+	class FlushCtx;
 
-	class ThreadLocal
-	{
-	public:
-		ThreadId m_tid ANKI_DBG_NULLIFY;
-		ObjectAllocatorSameType<Event> m_eventAlloc;
-		ObjectAllocatorSameType<Counter> m_counterAlloc;
-		IntrusiveList<Event> m_events;
-		IntrusiveList<Counter> m_counters;
-		Bool m_tracerKnowsAboutThis = false;
-	};
-
-	static thread_local ThreadLocal m_threadLocal;
+	GenericMemoryPoolAllocator<U8> m_alloc;
+
+	Second m_startFrameTime = 0.0;
+	U64 m_frame = 0;
+	SpinLock m_frameMtx; ///< Protect m_startFrameTime and m_frame.
+
+	static thread_local ThreadLocal* m_threadLocal;
 	DynamicArray<ThreadLocal*> m_allThreadLocal; ///< The Tracer should know about all the ThreadLocal.
 	Mutex m_threadLocalMtx;
 
-	class FlushCtx;
-	class PerFrameCounters;
-
 	/// Get the thread local ThreadLocal structure.
 	ThreadLocal& getThreadLocal();
 
@@ -114,6 +94,8 @@ private:
 
 	/// Dump the events and the counters to a chrome trace file.
 	Error writeTraceJson(const FlushCtx& ctx);
+
+	static void getSpreadsheetColumnName(U column, Array<char, 3>& arr);
 };
 
 /// Tracer singleton.
@@ -127,16 +109,17 @@ public:
 		: m_name(name)
 		, m_tracer(&TracerSingleton::get())
 	{
-		m_tracer->beginEvent();
+		m_handle = m_tracer->beginEvent();
 	}
 
 	~TraceScopedEvent()
 	{
-		m_tracer->endEvent(m_name);
+		m_tracer->endEvent(m_name, m_handle);
 	}
 
 private:
 	const char* m_name;
+	TracerEventHandle m_handle;
 	Tracer* m_tracer;
 };
 /// @}

+ 8 - 8
tests/util/Tracer.cpp

@@ -21,25 +21,25 @@ ANKI_TEST(Util, Tracer)
 	// 2 same events
 	tracer.newFrame(1);
 
-	tracer.beginEvent();
+	auto handle0 = tracer.beginEvent();
 	HighRezTimer::sleep(0.5);
-	tracer.endEvent("event");
+	tracer.endEvent("event", handle0);
 
-	tracer.beginEvent();
+	auto handle1 = tracer.beginEvent();
 	HighRezTimer::sleep(0.5);
-	tracer.endEvent("event");
+	tracer.endEvent("event", handle1);
 
 	// 4rd frame
 	// 2 different events & non zero counter
 	tracer.newFrame(3);
 
-	tracer.beginEvent();
+	auto handle2 = tracer.beginEvent();
 	HighRezTimer::sleep(0.5);
-	tracer.endEvent("event");
+	tracer.endEvent("event", handle2);
 
-	tracer.beginEvent();
+	auto handle3 = tracer.beginEvent();
 	HighRezTimer::sleep(0.5);
-	tracer.endEvent("event2");
+	tracer.endEvent("event2", handle3);
 
 	tracer.increaseCounter("counter", 100);