Browse Source

Add HWCPipe support

Panagiotis Christopoulos Charitos 4 years ago
parent
commit
a72f90cf7a

+ 42 - 177
AnKi/Core/App.cpp

@@ -14,7 +14,9 @@
 #include <AnKi/Util/HighRezTimer.h>
 #include <AnKi/Core/CoreTracer.h>
 #include <AnKi/Core/DeveloperConsole.h>
+#include <AnKi/Core/StatsUi.h>
 #include <AnKi/Core/NativeWindow.h>
+#include <AnKi/Core/MaliHwCounters.h>
 #include <AnKi/Input/Input.h>
 #include <AnKi/Scene/SceneGraph.h>
 #include <AnKi/Renderer/RenderQueue.h>
@@ -40,166 +42,6 @@ namespace anki {
 android_app* g_androidApp = nullptr;
 #endif
 
-class App::StatsUi
-{
-public:
-	template<typename T>
-	class BufferedValue
-	{
-	public:
-		void set(T x)
-		{
-			m_total += x;
-			++m_count;
-		}
-
-		F64 get(Bool flush)
-		{
-			if(flush)
-			{
-				m_avg = F64(m_total) / m_count;
-				m_count = 0;
-				m_total = 0.0;
-			}
-
-			return m_avg;
-		}
-
-	private:
-		T m_total = T(0);
-		F64 m_avg = 0.0;
-		U32 m_count = 0;
-	};
-
-	GenericMemoryPoolAllocator<U8> m_alloc;
-
-	BufferedValue<Second> m_frameTime;
-	BufferedValue<Second> m_renderTime;
-	BufferedValue<Second> m_sceneUpdateTime;
-	BufferedValue<Second> m_visTestsTime;
-	BufferedValue<Second> m_physicsTime;
-	BufferedValue<Second> m_gpuTime;
-
-	PtrSize m_allocatedCpuMem = 0;
-	U64 m_allocCount = 0;
-	U64 m_freeCount = 0;
-
-	U64 m_vkCpuMem = 0;
-	U64 m_vkGpuMem = 0;
-	U32 m_vkCmdbCount = 0;
-
-	PtrSize m_drawableCount = 0;
-
-	static const U32 BUFFERED_FRAMES = 16;
-	U32 m_bufferedFrames = 0;
-
-	StatsUi(const GenericMemoryPoolAllocator<U8>& alloc)
-		: m_alloc(alloc)
-	{
-	}
-
-	void build(CanvasPtr canvas)
-	{
-		// Misc
-		++m_bufferedFrames;
-		Bool flush = false;
-		if(m_bufferedFrames == BUFFERED_FRAMES)
-		{
-			flush = true;
-			m_bufferedFrames = 0;
-		}
-
-		// Start drawing the UI
-		canvas->pushFont(canvas->getDefaultFont(), 16);
-
-		const Vec4 oldWindowColor = ImGui::GetStyle().Colors[ImGuiCol_WindowBg];
-		ImGui::GetStyle().Colors[ImGuiCol_WindowBg].w = 0.3f;
-
-		if(ImGui::Begin("Stats", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_AlwaysAutoResize))
-		{
-			ImGui::SetWindowPos(Vec2(5.0f, 5.0f));
-			ImGui::SetWindowSize(Vec2(230.0f, 450.0f));
-
-			ImGui::Text("CPU Time:");
-			labelTime(m_frameTime.get(flush), "Total frame");
-			labelTime(m_renderTime.get(flush), "Renderer");
-			labelTime(m_sceneUpdateTime.get(flush), "Scene update");
-			labelTime(m_visTestsTime.get(flush), "Visibility");
-			labelTime(m_physicsTime.get(flush), "Physics");
-
-			ImGui::Text("----");
-			ImGui::Text("GPU Time:");
-			labelTime(m_gpuTime.get(flush), "Total frame");
-
-			ImGui::Text("----");
-			ImGui::Text("Memory:");
-			labelBytes(m_allocatedCpuMem, "Total CPU");
-			labelUint(m_allocCount, "Total allocations");
-			labelUint(m_freeCount, "Total frees");
-			labelBytes(m_vkCpuMem, "Vulkan CPU");
-			labelBytes(m_vkGpuMem, "Vulkan GPU");
-
-			ImGui::Text("----");
-			ImGui::Text("Vulkan:");
-			labelUint(m_vkCmdbCount, "Cmd buffers");
-
-			ImGui::Text("----");
-			ImGui::Text("Other:");
-			labelUint(m_drawableCount, "Drawbles");
-		}
-
-		ImGui::End();
-		ImGui::GetStyle().Colors[ImGuiCol_WindowBg] = oldWindowColor;
-
-		canvas->popFont();
-	}
-
-	void labelTime(Second val, CString name)
-	{
-		ImGui::Text("%s: %fms", name.cstr(), val * 1000.0);
-	}
-
-	void labelBytes(PtrSize val, CString name)
-	{
-		PtrSize gb, mb, kb, b;
-
-		gb = val / 1_GB;
-		val -= gb * 1_GB;
-
-		mb = val / 1_MB;
-		val -= mb * 1_MB;
-
-		kb = val / 1_KB;
-		val -= kb * 1_KB;
-
-		b = val;
-
-		StringAuto timestamp(m_alloc);
-		if(gb)
-		{
-			timestamp.sprintf("%s: %4u,%04u,%04u,%04u", name.cstr(), gb, mb, kb, b);
-		}
-		else if(mb)
-		{
-			timestamp.sprintf("%s: %4u,%04u,%04u", name.cstr(), mb, kb, b);
-		}
-		else if(kb)
-		{
-			timestamp.sprintf("%s: %4u,%04u", name.cstr(), kb, b);
-		}
-		else
-		{
-			timestamp.sprintf("%s: %4u", name.cstr(), b);
-		}
-		ImGui::TextUnformatted(timestamp.cstr());
-	}
-
-	void labelUint(U64 val, CString name)
-	{
-		ImGui::Text("%s: %lu", name.cstr(), val);
-	}
-};
-
 void* App::MemStats::allocCallback(void* userData, void* ptr, PtrSize size, PtrSize alignment)
 {
 	ANKI_ASSERT(userData);
@@ -269,7 +111,7 @@ App::~App()
 
 void App::cleanup()
 {
-	m_heapAlloc.deleteInstance(m_statsUi);
+	m_statsUi.reset(nullptr);
 	m_console.reset(nullptr);
 
 	m_heapAlloc.deleteInstance(m_scene);
@@ -292,6 +134,8 @@ void App::cleanup()
 	m_vertexMem = nullptr;
 	m_heapAlloc.deleteInstance(m_threadHive);
 	m_threadHive = nullptr;
+	m_heapAlloc.deleteInstance(m_maliHwCounters);
+	m_maliHwCounters = nullptr;
 	GrManager::deleteInstance(m_gr);
 	m_gr = nullptr;
 	Input::deleteInstance(m_input);
@@ -421,6 +265,14 @@ Error App::initInternal(const ConfigSet& config_, AllocAlignedCallback allocCb,
 
 	ANKI_CHECK(GrManager::newInstance(grInit, m_gr));
 
+	//
+	// Mali HW counters
+	//
+	if(m_gr->getDeviceCapabilities().m_gpuVendor == GpuVendor::ARM)
+	{
+		m_maliHwCounters = m_heapAlloc.newInstance<MaliHwCounters>(m_heapAlloc);
+	}
+
 	//
 	// GPU mem
 	//
@@ -500,7 +352,7 @@ Error App::initInternal(const ConfigSet& config_, AllocAlignedCallback allocCb,
 	//
 	// Misc
 	//
-	m_statsUi = m_heapAlloc.newInstance<StatsUi>(m_heapAlloc);
+	ANKI_CHECK(m_ui->newInstance<StatsUi>(m_statsUi));
 	ANKI_CHECK(m_ui->newInstance<DeveloperConsole>(m_console, m_allocCb, m_allocCbData, m_script));
 
 	ANKI_CORE_LOGI("Application initialized");
@@ -616,22 +468,35 @@ Error App::mainLoop()
 			// Stats
 			if(m_displayStats)
 			{
-				m_statsUi->m_frameTime.set(frameTime);
-				m_statsUi->m_renderTime.set(m_renderer->getStats().m_renderingCpuTime);
-				m_statsUi->m_sceneUpdateTime.set(m_scene->getStats().m_updateTime);
-				m_statsUi->m_visTestsTime.set(m_scene->getStats().m_visibilityTestsTime);
-				m_statsUi->m_physicsTime.set(m_scene->getStats().m_physicsUpdate);
-				m_statsUi->m_gpuTime.set(m_renderer->getStats().m_renderingGpuTime);
-				m_statsUi->m_allocatedCpuMem = m_memStats.m_allocatedMem.load();
-				m_statsUi->m_allocCount = m_memStats.m_allocCount.load();
-				m_statsUi->m_freeCount = m_memStats.m_freeCount.load();
-
+				StatsUi& statsUi = *static_cast<StatsUi*>(m_statsUi.get());
+
+				statsUi.setFrameTime(frameTime);
+				statsUi.setRenderTime(m_renderer->getStats().m_renderingCpuTime);
+				statsUi.setSceneUpdateTime(m_scene->getStats().m_updateTime);
+				statsUi.setVisibilityTestsTime(m_scene->getStats().m_visibilityTestsTime);
+				statsUi.setPhysicsTime(m_scene->getStats().m_physicsUpdate);
+
+				statsUi.setGpuTime(m_renderer->getStats().m_renderingGpuTime);
+				if(m_maliHwCounters)
+				{
+					MaliHwCountersOut out;
+					m_maliHwCounters->sample(out);
+
+					statsUi.setGpuActiveCycles(out.m_gpuActive);
+					statsUi.setGpuReadBandwidth(out.m_readBandwidth);
+					statsUi.setGpuWriteBandwidth(out.m_writeBandwidth);
+				}
+
+				statsUi.setAllocatedCpuMemory(m_memStats.m_allocatedMem.load());
+				statsUi.setCpuAllocationCount(m_memStats.m_allocCount.load());
+				statsUi.setCpuFreeCount(m_memStats.m_freeCount.load());
 				GrManagerStats grStats = m_gr->getStats();
-				m_statsUi->m_vkCpuMem = grStats.m_cpuMemory;
-				m_statsUi->m_vkGpuMem = grStats.m_gpuMemory;
-				m_statsUi->m_vkCmdbCount = grStats.m_commandBufferCount;
+				statsUi.setVkCpuMemory(grStats.m_cpuMemory);
+				statsUi.setVkGpuMemory(grStats.m_gpuMemory);
+
+				statsUi.setVkCommandBufferCount(grStats.m_commandBufferCount);
 
-				m_statsUi->m_drawableCount = rqueue.countAllRenderables();
+				statsUi.setDrawableCount(rqueue.countAllRenderables());
 			}
 
 #if ANKI_ENABLE_TRACE
@@ -673,7 +538,7 @@ void App::injectUiElements(DynamicArrayAuto<UiQueueElement>& newUiElementArr, Re
 	U32 count = originalCount;
 	if(m_displayStats)
 	{
-		newUiElementArr[count].m_userData = m_statsUi;
+		newUiElementArr[count].m_userData = m_statsUi.get();
 		newUiElementArr[count].m_drawCallback = [](CanvasPtr& canvas, void* userData) -> void {
 			static_cast<StatsUi*>(userData)->build(canvas);
 		};

+ 3 - 3
AnKi/Core/App.h

@@ -31,6 +31,7 @@ class VertexGpuMemoryPool;
 class UiManager;
 class UiQueueElement;
 class RenderQueue;
+class MaliHwCounters;
 
 /// The core class of the engine.
 class App
@@ -157,8 +158,6 @@ public:
 	}
 
 private:
-	class StatsUi;
-
 	// Allocation
 	AllocAlignedCallback m_allocCb;
 	void* m_allocCbData;
@@ -171,6 +170,7 @@ private:
 	NativeWindow* m_window = nullptr;
 	Input* m_input = nullptr;
 	GrManager* m_gr = nullptr;
+	MaliHwCounters* m_maliHwCounters = nullptr;
 	VertexGpuMemoryPool* m_vertexMem = nullptr;
 	StagingGpuMemoryPool* m_stagingMem = nullptr;
 	PhysicsWorld* m_physics = nullptr;
@@ -182,7 +182,7 @@ private:
 	ScriptManager* m_script = nullptr;
 
 	// Misc
-	StatsUi* m_statsUi = nullptr;
+	UiImmediateModeBuilderPtr m_statsUi;
 	Bool m_displayStats = false;
 	UiImmediateModeBuilderPtr m_console;
 	Bool m_consoleEnabled = false;

+ 2 - 1
AnKi/Core/CMakeLists.txt

@@ -1,4 +1,5 @@
-set(SOURCES App.cpp ConfigSet.cpp GpuMemoryPools.cpp DeveloperConsole.cpp CoreTracer.cpp)
+set(SOURCES App.cpp ConfigSet.cpp GpuMemoryPools.cpp DeveloperConsole.cpp CoreTracer.cpp MaliHwCounters.cpp
+	StatsUi.cpp)
 file(GLOB HEADERS *.h)
 
 if(SDL)

+ 1 - 0
AnKi/Core/Common.h

@@ -6,6 +6,7 @@
 #pragma once
 
 #include <AnKi/Config.h>
+#include <AnKi/Util/StdTypes.h>
 
 namespace anki {
 

+ 67 - 0
AnKi/Core/MaliHwCounters.cpp

@@ -0,0 +1,67 @@
+// Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <AnKi/Core/MaliHwCounters.h>
+
+#define ANKI_HWCPIPE_ENABLE (ANKI_OS_ANDROID == 1)
+
+#if ANKI_HWCPIPE_ENABLE
+#	include <ThirdParty/HwcPipe/hwcpipe.h>
+#endif
+
+namespace anki {
+
+MaliHwCounters::MaliHwCounters(GenericMemoryPoolAllocator<U8> alloc)
+	: m_alloc(alloc)
+{
+#if ANKI_HWCPIPE_ENABLE
+	hwcpipe::HWCPipe* hwc = m_alloc.newInstance<hwcpipe::HWCPipe>();
+	hwc->set_enabled_gpu_counters({hwcpipe::GpuCounter::GpuCycles, hwcpipe::GpuCounter::ExternalMemoryWriteBytes,
+								   hwcpipe::GpuCounter::ExternalMemoryReadBytes});
+
+	hwc->run();
+
+	m_impl = hwc;
+#else
+	(void)m_impl; // Shut up the compiler
+#endif
+}
+
+MaliHwCounters::~MaliHwCounters()
+{
+#if ANKI_HWCPIPE_ENABLE
+	hwcpipe::HWCPipe* hwc = static_cast<hwcpipe::HWCPipe*>(m_impl);
+	hwc->stop();
+	m_alloc.deleteInstance(hwc);
+	m_impl = nullptr;
+#endif
+}
+
+void MaliHwCounters::sample(MaliHwCountersOut& out)
+{
+	out = {};
+
+#if ANKI_HWCPIPE_ENABLE
+	hwcpipe::HWCPipe* hwc = static_cast<hwcpipe::HWCPipe*>(m_impl);
+
+	const hwcpipe::Measurements m = hwc->sample();
+
+	if(m.gpu)
+	{
+		auto readCounter = [&](hwcpipe::GpuCounter counter) -> U64 {
+			auto it = m.gpu->find(counter);
+			ANKI_ASSERT(it != m.gpu->end());
+			const hwcpipe::Value val = it->second;
+			return val.get<U64>();
+		};
+
+		out.m_gpuActive = readCounter(hwcpipe::GpuCounter::GpuCycles);
+		out.m_readBandwidth = readCounter(hwcpipe::GpuCounter::ExternalMemoryReadBytes);
+		out.m_writeBandwidth = readCounter(hwcpipe::GpuCounter::ExternalMemoryWriteBytes);
+	}
+#endif
+}
+
+} // end namespace anki

+ 46 - 0
AnKi/Core/MaliHwCounters.h

@@ -0,0 +1,46 @@
+// Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Core/Common.h>
+#include <AnKi/Util/Allocator.h>
+
+namespace anki {
+
+/// @addtogroup core
+/// @{
+
+/// The HW counters.
+/// @memberof MaliHwCounters
+class MaliHwCountersOut
+{
+public:
+	U64 m_gpuActive = 0;
+	PtrSize m_readBandwidth = 0; ///< In bytes.
+	PtrSize m_writeBandwidth = 0; ///< In bytes.
+};
+
+/// Sample HW counters for Mali GPUs.
+class MaliHwCounters
+{
+public:
+	MaliHwCounters(GenericMemoryPoolAllocator<U8> alloc);
+
+	MaliHwCounters(const MaliHwCounters&) = delete; // Non-copyable
+
+	~MaliHwCounters();
+
+	MaliHwCounters& operator=(const MaliHwCounters&) = delete; // Non-copyable
+
+	void sample(MaliHwCountersOut& out);
+
+private:
+	GenericMemoryPoolAllocator<U8> m_alloc;
+	void* m_impl = nullptr;
+};
+/// @}
+
+} // end namespace anki

+ 122 - 0
AnKi/Core/StatsUi.cpp

@@ -0,0 +1,122 @@
+// Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#include <AnKi/Core/StatsUi.h>
+#include <AnKi/Ui/UiManager.h>
+#include <AnKi/Ui/Font.h>
+#include <AnKi/Ui/Canvas.h>
+
+namespace anki {
+
+StatsUi::~StatsUi()
+{
+}
+
+Error StatsUi::init()
+{
+	ANKI_CHECK(m_manager->newInstance(m_font, "EngineAssets/UbuntuMonoRegular.ttf", Array<U32, 1>{24}));
+
+	return Error::NONE;
+}
+
+void StatsUi::labelBytes(PtrSize val, CString name) const
+{
+	PtrSize gb, mb, kb, b;
+
+	gb = val / 1_GB;
+	val -= gb * 1_GB;
+
+	mb = val / 1_MB;
+	val -= mb * 1_MB;
+
+	kb = val / 1_KB;
+	val -= kb * 1_KB;
+
+	b = val;
+
+	StringAuto timestamp(getAllocator());
+	if(gb)
+	{
+		timestamp.sprintf("%s: %4u,%04u,%04u,%04u", name.cstr(), gb, mb, kb, b);
+	}
+	else if(mb)
+	{
+		timestamp.sprintf("%s: %4u,%04u,%04u", name.cstr(), mb, kb, b);
+	}
+	else if(kb)
+	{
+		timestamp.sprintf("%s: %4u,%04u", name.cstr(), kb, b);
+	}
+	else
+	{
+		timestamp.sprintf("%s: %4u", name.cstr(), b);
+	}
+	ImGui::TextUnformatted(timestamp.cstr());
+}
+
+void StatsUi::build(CanvasPtr canvas)
+{
+	// Misc
+	++m_bufferedFrames;
+	Bool flush = false;
+	if(m_bufferedFrames == BUFFERED_FRAMES)
+	{
+		flush = true;
+		m_bufferedFrames = 0;
+	}
+
+	// Start drawing the UI
+	canvas->pushFont(m_font, 24);
+
+	const Vec4 oldWindowColor = ImGui::GetStyle().Colors[ImGuiCol_WindowBg];
+	ImGui::GetStyle().Colors[ImGuiCol_WindowBg].w = 0.3f;
+
+	if(ImGui::Begin("Stats", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_AlwaysAutoResize))
+	{
+		ImGui::SetWindowPos(Vec2(5.0f, 5.0f));
+		ImGui::SetWindowSize(Vec2(230.0f, 450.0f));
+
+		ImGui::Text("CPU Time:");
+		labelTime(m_frameTime.get(flush), "Total frame");
+		labelTime(m_renderTime.get(flush), "Renderer");
+		labelTime(m_sceneUpdateTime.get(flush), "Scene update");
+		labelTime(m_visTestsTime.get(flush), "Visibility");
+		labelTime(m_physicsTime.get(flush), "Physics");
+
+		ImGui::Text("----");
+		ImGui::Text("GPU:");
+		labelTime(m_gpuTime.get(flush), "Total frame");
+		const U64 gpuActive = m_gpuActive.get(flush);
+		if(gpuActive)
+		{
+			ImGui::Text("%s: %luK cycles", "Active Cycles", gpuActive / 1000);
+			labelBytes(m_gpuReadBandwidth.get(flush), "Read bandwidth");
+			labelBytes(m_gpuWriteBandwidth.get(flush), "Write bandwidth");
+		}
+
+		ImGui::Text("----");
+		ImGui::Text("Memory:");
+		labelBytes(m_allocatedCpuMem, "Total CPU");
+		labelUint(m_allocCount, "Total allocations");
+		labelUint(m_freeCount, "Total frees");
+		labelBytes(m_vkCpuMem, "Vulkan CPU");
+		labelBytes(m_vkGpuMem, "Vulkan GPU");
+
+		ImGui::Text("----");
+		ImGui::Text("Vulkan:");
+		labelUint(m_vkCmdbCount, "Cmd buffers");
+
+		ImGui::Text("----");
+		ImGui::Text("Other:");
+		labelUint(m_drawableCount, "Drawbles");
+	}
+
+	ImGui::End();
+	ImGui::GetStyle().Colors[ImGuiCol_WindowBg] = oldWindowColor;
+
+	canvas->popFont();
+}
+
+} // end namespace anki

+ 182 - 0
AnKi/Core/StatsUi.h

@@ -0,0 +1,182 @@
+// Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+#pragma once
+
+#include <AnKi/Core/Common.h>
+#include <AnKi/Ui/UiImmediateModeBuilder.h>
+
+namespace anki {
+
+/// @addtogroup core
+/// @{
+
+/// XXX
+class StatsUi : public UiImmediateModeBuilder
+{
+public:
+	StatsUi(UiManager* ui)
+		: UiImmediateModeBuilder(ui)
+	{
+	}
+
+	~StatsUi();
+
+	ANKI_USE_RESULT Error init();
+
+	void build(CanvasPtr ctx) override;
+
+	void setFrameTime(Second v)
+	{
+		m_frameTime.set(v);
+	}
+
+	void setRenderTime(Second v)
+	{
+		m_renderTime.set(v);
+	}
+
+	void setSceneUpdateTime(Second v)
+	{
+		m_sceneUpdateTime.set(v);
+	}
+
+	void setVisibilityTestsTime(Second v)
+	{
+		m_visTestsTime.set(v);
+	}
+
+	void setPhysicsTime(Second v)
+	{
+		m_physicsTime.set(v);
+	}
+
+	void setGpuTime(Second v)
+	{
+		m_gpuTime.set(v);
+	}
+
+	void setGpuActiveCycles(U64 v)
+	{
+		m_gpuActive.set(v);
+	}
+
+	void setGpuReadBandwidth(PtrSize v)
+	{
+		m_gpuReadBandwidth.set(v);
+	}
+
+	void setGpuWriteBandwidth(PtrSize v)
+	{
+		m_gpuWriteBandwidth.set(v);
+	}
+
+	void setAllocatedCpuMemory(PtrSize v)
+	{
+		m_allocatedCpuMem = v;
+	}
+
+	void setCpuAllocationCount(U64 v)
+	{
+		m_allocCount = v;
+	}
+
+	void setCpuFreeCount(U64 v)
+	{
+		m_freeCount = v;
+	}
+
+	void setVkCpuMemory(PtrSize v)
+	{
+		m_vkCpuMem = v;
+	}
+
+	void setVkGpuMemory(PtrSize v)
+	{
+		m_vkGpuMem = v;
+	}
+
+	void setVkCommandBufferCount(U32 v)
+	{
+		m_vkCmdbCount = v;
+	}
+
+	void setDrawableCount(U64 v)
+	{
+		m_drawableCount = v;
+	}
+
+private:
+	static constexpr U32 BUFFERED_FRAMES = 16;
+
+	template<typename T>
+	class BufferedValue
+	{
+	public:
+		void set(T x)
+		{
+			m_rollongAvg += x / T(BUFFERED_FRAMES);
+		}
+
+		T get(Bool flush)
+		{
+			if(flush)
+			{
+				m_avg = m_rollongAvg;
+				m_rollongAvg = T(0);
+			}
+
+			return m_avg;
+		}
+
+	private:
+		T m_rollongAvg = T(0);
+		T m_avg = T(0);
+	};
+
+	FontPtr m_font;
+	U32 m_bufferedFrames = 0;
+
+	// CPU
+	BufferedValue<Second> m_frameTime;
+	BufferedValue<Second> m_renderTime;
+	BufferedValue<Second> m_sceneUpdateTime;
+	BufferedValue<Second> m_visTestsTime;
+	BufferedValue<Second> m_physicsTime;
+
+	// GPU
+	BufferedValue<Second> m_gpuTime;
+	BufferedValue<U64> m_gpuActive;
+	BufferedValue<PtrSize> m_gpuReadBandwidth;
+	BufferedValue<PtrSize> m_gpuWriteBandwidth;
+
+	// Memory
+	PtrSize m_allocatedCpuMem = 0;
+	U64 m_allocCount = 0;
+	U64 m_freeCount = 0;
+	PtrSize m_vkCpuMem = 0;
+	PtrSize m_vkGpuMem = 0;
+
+	// Vulkan
+	U32 m_vkCmdbCount = 0;
+
+	// Other
+	PtrSize m_drawableCount = 0;
+
+	static void labelTime(Second val, CString name)
+	{
+		ImGui::Text("%s: %fms", name.cstr(), val * 1000.0);
+	}
+
+	static void labelUint(U64 val, CString name)
+	{
+		ImGui::Text("%s: %lu", name.cstr(), val);
+	}
+
+	void labelBytes(PtrSize val, CString name) const;
+};
+/// @}
+
+} // end namespace anki

+ 5 - 1
CMakeLists.txt

@@ -238,7 +238,7 @@ endif()
 ################################################################################
 # Thirdparty                                                                   #
 ################################################################################
-set(ANKI_EXTERN_SUB_DIRS TinyXml2 Lua ZLib Bullet ImGui MeshOptimizer SprivCross)
+set(ANKI_EXTERN_SUB_DIRS TinyXml2 Lua ZLib Bullet ImGui MeshOptimizer SprivCross HwcPipe)
 
 # Bullet config
 option(BUILD_BULLET2_DEMOS OFF)
@@ -420,6 +420,10 @@ if(SDL)
 	set(THIRD_PARTY_LIBS ${THIRD_PARTY_LIBS} SDL2-static)
 endif()
 
+if(ANDROID)
+	set(THIRD_PARTY_LIBS ${THIRD_PARTY_LIBS} AnKiHwcPipe)
+endif()
+
 set(THIRD_PARTY_LIBS ${THIRD_PARTY_LIBS} BulletSoftBody BulletDynamics BulletCollision LinearMath
 	AnKiSpirvCross AnKiTinyXml2 AnKiLua AnKiMeshOptimizer AnKiZLib glslang SPIRV OGLCompiler OSDependent AnKiImGui)
 

+ 4 - 0
ThirdParty/HwcPipe/CMakeLists.txt

@@ -0,0 +1,4 @@
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+add_definitions("-DHWCPIPE_NO_JSON")
+add_library(AnKiHwcPipe "hwcpipe.cpp" "vendor/arm/mali/mali_profiler.cpp" "vendor/arm/pmu/pmu_counter.cpp" "vendor/arm/pmu/pmu_profiler.cpp")
+

+ 21 - 0
ThirdParty/HwcPipe/LICENSE

@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Arm Software
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 156 - 0
ThirdParty/HwcPipe/README.md

@@ -0,0 +1,156 @@
+<!--
+- Copyright (c) 2019, Arm Limited and Contributors
+-
+- SPDX-License-Identifier: MIT
+-
+- Permission is hereby granted, free of charge,
+- to any person obtaining a copy of this software and associated documentation files (the "Software"),
+- to deal in the Software without restriction, including without limitation the rights to
+- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+- and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+-
+- The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+-
+- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+- INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+- WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+-
+-->
+
+# HWCPipe
+
+## Introduction
+
+HWCPipe is a simple and extensible interface for reading CPU and GPU hardware counters.
+
+## License
+
+The software is provided under an MIT license.
+
+This project has a third-party dependency, which may have independent licensing:
+
+- [nlohmann/json](https://github.com/nlohmann/json): A JSON library for modern C++
+
+## Contributions
+
+All contributions are accepted under the same [LICENSE](LICENSE).
+
+## Building
+
+To use HWCPipe, build it as a shared library in your project.
+
+If your project uses CMake, you can add the following to your `CMakeLists.txt`:
+
+```
+add_subdirectory(hwcpipe)
+```
+
+## Usage
+
+### Using HWCPipe
+
+Basic usage for HWCPipe is simple:
+
+```
+// HWCPipe performs automated platform detection for CPU/GPU counters
+hwcpipe::HWCPipe h;
+
+// Start HWCPipe once at the beginning of the profiling session
+h.run();
+
+while (main_loop) {
+    // Call sample() to sample counters with the frequency you need
+    auto measurements = h.sample();
+
+    [...]
+}
+
+// At the end of the profiling session, stop HWCPipe
+h.stop();
+```
+
+The `sample` function returns a `Measurements` struct, which can be accessed like this:
+
+```
+// Check if CPU measurements are available
+if (measurements.cpu)
+{
+    // Look for a counter in the map
+    const auto &counter = measurements.cpu->find(CpuCounter::Cycles);
+    if (counter != measurements.cpu->end())
+    {
+        // Get the data stored in the counter, casted to the type you need
+        auto value = counter->second.get<float>();
+    }
+}
+```
+
+### Enabling counters
+
+The available counters are specified in the `CpuCounter` and `GpuCounter` enums (`cpu_profiler.h` and `gpu_profiler.h` respectively).
+
+Platforms will support a subset of these counters, which can be queried via:
+
+```
+auto cpu_counters = h.cpu_profiler()->supported_counters();
+auto gpu_counters = h.gpu_profiler()->supported_counters()
+```
+
+You can specify the counters to be enabled in the following ways:
+
+```
+// Enable a default set of counters
+auto h = hwcpipe::HWCPipe();
+
+// Pass sets of CPU and GPU counters to be enabled
+auto h = hwcpipe::HWCPipe({CpuCounter::Cycles, CpuCounter::Instructions}, {GpuCounter::GpuCycles});
+
+// Pass a JSON string
+auto h = hwcpipe::HWCPipe(json);
+```
+
+The JSON string should be formatted like this:
+
+```
+{
+    "cpu": ["Cycles", "Instructions"],
+    "gpu": ["GpuCycles"]
+}
+```
+
+Available counter names can be found in `cpu_counter_names` (`cpu_profiler.h`) and `gpu_counter_names` (`gpu_profiler.h`).
+
+For more information regarding Mali counters, see [Mali Performance Counters](https://community.arm.com/graphics/b/blog/posts/mali-bifrost-family-performance-counters).
+
+### Enabling profiling on Android
+
+In order for performance data to be displayed, profiling needs to be enabled on the device.
+Some devices may disable it by default.
+
+Profiling can be enabled via `adb`:
+
+```
+adb shell setprop security.perf_harden 0
+```
+
+## Adding support for a new platform
+
+If the counters provided in `CpuCounter` and `GpuCounter` are enough for the new platform,
+the process is simple:
+
+* Add an implementation of either `CpuProfiler` of `GpuProfiler` (you can use `PmuProfiler` and `MaliProfiler` as references).
+* Add your platform to the automated platform detection in `hwcpipe.cpp`. For consistency in platform detection, the constructor for your platform should throw if the platform is not available.
+* Add your platform to the build system.
+
+### Adding new counters
+
+If you need to add new counters to the existing ones, you should update the following variables:
+
+* Add the counter to the `CpuCounter`/`GpuCounter` enum.
+* Add the counter name to the `cpu_counter_names`/`gpu_counter_names` map (necessary for JSON initialization).
+* Add a description and the unit for your counter to the `cpu_counter_info`/`gpu_counter_info` map.
+
+The `CpuCounter` and `GpuCounter` enums are meant to be expanded. Platforms must not break if new counters are added.

+ 151 - 0
ThirdParty/HwcPipe/cpu_profiler.h

@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#include "value.h"
+
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace hwcpipe
+{
+// The available CPU counters. Profiler implementations will support a subset of them.
+enum class CpuCounter
+{
+	Cycles,
+	Instructions,
+	CacheReferences,
+	CacheMisses,
+	BranchInstructions,
+	BranchMisses,
+
+	L1Accesses,
+	InstrRetired,
+	L2Accesses,
+	L3Accesses,
+	BusReads,
+	BusWrites,
+	MemReads,
+	MemWrites,
+	ASESpec,
+	VFPSpec,
+	CryptoSpec,
+
+	MaxValue
+};
+
+// Mapping from CPU counter names to enum values. Used for JSON initialization.
+const std::unordered_map<std::string, CpuCounter> cpu_counter_names{
+    {"Cycles", CpuCounter::Cycles},
+    {"Instructions", CpuCounter::Instructions},
+    {"CacheReferences", CpuCounter::CacheReferences},
+    {"CacheMisses", CpuCounter::CacheMisses},
+    {"BranchInstructions", CpuCounter::BranchInstructions},
+    {"BranchMisses", CpuCounter::BranchMisses},
+
+    {"L1Accesses", CpuCounter::L1Accesses},
+    {"InstrRetired", CpuCounter::InstrRetired},
+    {"L2Accesses", CpuCounter::L2Accesses},
+    {"L3Accesses", CpuCounter::L3Accesses},
+    {"BusReads", CpuCounter::BusReads},
+    {"BusWrites", CpuCounter::BusWrites},
+    {"MemReads", CpuCounter::MemReads},
+    {"MemWrites", CpuCounter::MemWrites},
+    {"ASESpec", CpuCounter::ASESpec},
+    {"VFPSpec", CpuCounter::VFPSpec},
+    {"CryptoSpec", CpuCounter::CryptoSpec},
+};
+
+// A hash function for CpuCounter values
+struct CpuCounterHash
+{
+	template <typename T>
+	std::size_t operator()(T t) const
+	{
+		return static_cast<std::size_t>(t);
+	}
+};
+
+struct CpuCounterInfo
+{
+	std::string desc;
+	std::string unit;
+};
+
+// Mapping from each counter to its corresponding information (description and unit)
+const std::unordered_map<CpuCounter, CpuCounterInfo, CpuCounterHash> cpu_counter_info{
+    {CpuCounter::Cycles, {"Number of CPU cycles", "cycles"}},
+    {CpuCounter::Instructions, {"Number of CPU instructions", "instructions"}},
+    {CpuCounter::CacheReferences, {"Number of cache references", "references"}},
+    {CpuCounter::CacheMisses, {"Number of cache misses", "misses"}},
+    {CpuCounter::BranchInstructions, {"Number of branch instructions", "instructions"}},
+    {CpuCounter::BranchMisses, {"Number of branch misses", "misses"}},
+
+    {CpuCounter::L1Accesses, {"L1 data cache accesses", "accesses"}},
+    {CpuCounter::InstrRetired, {"All retired instructions", "instructions"}},
+    {CpuCounter::L2Accesses, {"L2 data cache accesses", "accesses"}},
+    {CpuCounter::L3Accesses, {"L3 data cache accesses", "accesses"}},
+    {CpuCounter::BusReads, {"Bus access reads", "beats"}},
+    {CpuCounter::BusWrites, {"Bus access writes", "beats"}},
+    {CpuCounter::MemReads, {"Data memory access, load instructions", "instructions"}},
+    {CpuCounter::MemWrites, {"Data memory access, store instructions", "instructions"}},
+    {CpuCounter::ASESpec, {"Speculatively executed SIMD operations", "operations"}},
+    {CpuCounter::VFPSpec, {"Speculatively executed floating point operations", "operations"}},
+    {CpuCounter::CryptoSpec, {"Speculatively executed cryptographic operations", "operations"}},
+};
+
+typedef std::unordered_set<CpuCounter, CpuCounterHash> CpuCounterSet;
+typedef std::unordered_map<CpuCounter, Value, CpuCounterHash>
+    CpuMeasurements;
+
+/** An interface for classes that collect CPU performance data. */
+class CpuProfiler
+{
+  public:
+	virtual ~CpuProfiler() = default;
+
+	// Returns the enabled counters
+	virtual const CpuCounterSet &enabled_counters() const = 0;
+
+	// Returns the counters that the platform supports
+	virtual const CpuCounterSet &supported_counters() const = 0;
+
+	// Sets the enabled counters after initialization
+	virtual void set_enabled_counters(CpuCounterSet counters) = 0;
+
+	// Starts a profiling session
+	virtual void run() = 0;
+
+	// Sample the counters. Returns a map of measurements for the counters
+	// that are both available and enabled.
+	// A profiling session must be running when sampling the counters.
+	virtual const CpuMeasurements &sample() = 0;
+
+	// Stops the active profiling session
+	virtual void stop() = 0;
+};
+
+}        // namespace hwcpipe

+ 192 - 0
ThirdParty/HwcPipe/gpu_profiler.h

@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#include "value.h"
+
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace hwcpipe
+{
+// The available GPU counters. Profiler implementations will support a subset of them.
+enum class GpuCounter
+{
+	GpuCycles,
+	VertexComputeCycles,
+	FragmentCycles,
+	TilerCycles,
+
+	VertexComputeJobs,
+	FragmentJobs,
+	Pixels,
+
+	Tiles,
+	TransactionEliminations,
+	EarlyZTests,
+	EarlyZKilled,
+	LateZTests,
+	LateZKilled,
+
+	Instructions,
+	DivergedInstructions,
+
+	ShaderCycles,
+	ShaderArithmeticCycles,
+	ShaderLoadStoreCycles,
+	ShaderTextureCycles,
+
+	CacheReadLookups,
+	CacheWriteLookups,
+	ExternalMemoryReadAccesses,
+	ExternalMemoryWriteAccesses,
+	ExternalMemoryReadStalls,
+	ExternalMemoryWriteStalls,
+	ExternalMemoryReadBytes,
+	ExternalMemoryWriteBytes,
+
+	MaxValue
+};
+
+// Mapping from GPU counter names to enum values. Used for JSON initialization.
+const std::unordered_map<std::string, GpuCounter> gpu_counter_names{
+    {"GpuCycles", GpuCounter::GpuCycles},
+    {"VertexComputeCycles", GpuCounter::VertexComputeCycles},
+    {"FragmentCycles", GpuCounter::FragmentCycles},
+    {"TilerCycles", GpuCounter::TilerCycles},
+
+    {"VertexComputeJobs", GpuCounter::VertexComputeJobs},
+    {"Tiles", GpuCounter::Tiles},
+    {"TransactionEliminations", GpuCounter::TransactionEliminations},
+    {"FragmentJobs", GpuCounter::FragmentJobs},
+    {"Pixels", GpuCounter::Pixels},
+
+    {"EarlyZTests", GpuCounter::EarlyZTests},
+    {"EarlyZKilled", GpuCounter::EarlyZKilled},
+    {"LateZTests", GpuCounter::LateZTests},
+    {"LateZKilled", GpuCounter::LateZKilled},
+
+    {"Instructions", GpuCounter::Instructions},
+    {"DivergedInstructions", GpuCounter::DivergedInstructions},
+
+    {"ShaderCycles", GpuCounter::ShaderCycles},
+    {"ShaderArithmeticCycles", GpuCounter::ShaderArithmeticCycles},
+    {"ShaderLoadStoreCycles", GpuCounter::ShaderLoadStoreCycles},
+    {"ShaderTextureCycles", GpuCounter::ShaderTextureCycles},
+
+    {"CacheReadLookups", GpuCounter::CacheReadLookups},
+    {"CacheWriteLookups", GpuCounter::CacheWriteLookups},
+    {"ExternalMemoryReadAccesses", GpuCounter::ExternalMemoryReadAccesses},
+    {"ExternalMemoryWriteAccesses", GpuCounter::ExternalMemoryWriteAccesses},
+    {"ExternalMemoryReadStalls", GpuCounter::ExternalMemoryReadStalls},
+    {"ExternalMemoryWriteStalls", GpuCounter::ExternalMemoryWriteStalls},
+    {"ExternalMemoryReadBytes", GpuCounter::ExternalMemoryReadBytes},
+    {"ExternalMemoryWriteBytes", GpuCounter::ExternalMemoryWriteBytes},
+};
+
+// A hash function for GpuCounter values
+struct GpuCounterHash
+{
+	template <typename T>
+	std::size_t operator()(T t) const
+	{
+		return static_cast<std::size_t>(t);
+	}
+};
+
+struct GpuCounterInfo
+{
+	std::string desc;
+	std::string unit;
+};
+
+// Mapping from each counter to its corresponding information (description and unit)
+const std::unordered_map<GpuCounter, GpuCounterInfo, GpuCounterHash> gpu_counter_info{
+    {GpuCounter::GpuCycles, {"Number of GPU cycles", "cycles"}},
+    {GpuCounter::VertexComputeCycles, {"Number of vertex/compute cycles", "cycles"}},
+    {GpuCounter::FragmentCycles, {"Number of fragment cycles", "cycles"}},
+    {GpuCounter::TilerCycles, {"Number of tiler cycles", "cycles"}},
+
+    {GpuCounter::VertexComputeJobs, {"Number of vertex/compute jobs", "jobs"}},
+    {GpuCounter::Tiles, {"Number of physical tiles written", "tiles"}},
+    {GpuCounter::TransactionEliminations, {"Number of transaction eliminations", "tiles"}},
+    {GpuCounter::FragmentJobs, {"Number of fragment jobs", "jobs"}},
+    {GpuCounter::Pixels, {"Number of pixels shaded", "cycles"}},
+
+    {GpuCounter::EarlyZTests, {"Early-Z tests performed", "tests"}},
+    {GpuCounter::EarlyZKilled, {"Early-Z tests resulting in a kill", "tests"}},
+    {GpuCounter::LateZTests, {"Late-Z tests performed", "tests"}},
+    {GpuCounter::LateZKilled, {"Late-Z tests resulting in a kill", "tests"}},
+
+    {GpuCounter::Instructions, {"Number of shader instructions", "instructions"}},
+    {GpuCounter::DivergedInstructions, {"Number of diverged shader instructions", "instructions"}},
+
+    {GpuCounter::ShaderCycles, {"Shader total cycles", "cycles"}},
+    {GpuCounter::ShaderArithmeticCycles, {"Shader arithmetic cycles", "cycles"}},
+    {GpuCounter::ShaderLoadStoreCycles, {"Shader load/store cycles", "cycles"}},
+    {GpuCounter::ShaderTextureCycles, {"Shader texture cycles", "cycles"}},
+
+    {GpuCounter::CacheReadLookups, {"Cache read lookups", "lookups"}},
+    {GpuCounter::CacheWriteLookups, {"Cache write lookups", "lookups"}},
+    {GpuCounter::ExternalMemoryReadAccesses, {"Reads from external memory", "accesses"}},
+    {GpuCounter::ExternalMemoryWriteAccesses, {"Writes to external memory", "accesses"}},
+    {GpuCounter::ExternalMemoryReadStalls, {"Stalls when reading from external memory", "stalls"}},
+    {GpuCounter::ExternalMemoryWriteStalls, {"Stalls when writing to external memory", "stalls"}},
+    {GpuCounter::ExternalMemoryReadBytes, {"Bytes read to external memory", "B"}},
+    {GpuCounter::ExternalMemoryWriteBytes, {"Bytes written to external memory", "B"}},
+};
+
+typedef std::unordered_set<GpuCounter, GpuCounterHash>        GpuCounterSet;
+typedef std::unordered_map<GpuCounter, Value, GpuCounterHash> GpuMeasurements;
+
+/** An interface for classes that collect GPU performance data. */
+class GpuProfiler
+{
+  public:
+	virtual ~GpuProfiler() = default;
+
+	// Returns the enabled counters
+	virtual const GpuCounterSet &enabled_counters() const = 0;
+
+	// Returns the counters that the platform supports
+	virtual const GpuCounterSet &supported_counters() const = 0;
+
+	// Sets the enabled counters after initialization
+	virtual void set_enabled_counters(GpuCounterSet counters) = 0;
+
+	// Starts a profiling session
+	virtual void run() = 0;
+
+	// Sample the counters. Returns a map of measurements for the counters
+	// that are both available and enabled.
+	// A profiling session must be running when sampling the counters.
+	virtual const GpuMeasurements &sample() = 0;
+
+	// Stops the active profiling session
+	virtual void stop() = 0;
+};
+
+}        // namespace hwcpipe

+ 203 - 0
ThirdParty/HwcPipe/hwcpipe.cpp

@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "hwcpipe.h"
+#include "hwcpipe_log.h"
+
+#ifdef __linux__
+#	include "vendor/arm/pmu/pmu_profiler.h"
+#	include "vendor/arm/mali/mali_profiler.h"
+#endif
+
+#ifndef HWCPIPE_NO_JSON
+#include <json.hpp>
+using json = nlohmann::json;
+#endif
+
+#include <memory>
+
+namespace hwcpipe
+{
+#ifndef HWCPIPE_NO_JSON
+HWCPipe::HWCPipe(const char *json_string)
+{
+	auto json = json::parse(json_string);
+
+	CpuCounterSet enabled_cpu_counters{};
+	auto          cpu = json.find("cpu");
+	if (cpu != json.end())
+	{
+		for (auto &counter_name : cpu->items())
+		{
+			auto counter = cpu_counter_names.find(counter_name.value().get<std::string>());
+			if (counter != cpu_counter_names.end())
+			{
+				enabled_cpu_counters.insert(counter->second);
+			}
+			else
+			{
+				HWCPIPE_LOG("CPU counter \"%s\" not found.", counter_name.value().get<std::string>().c_str());
+			}
+		}
+	}
+
+	GpuCounterSet enabled_gpu_counters{};
+	auto          gpu = json.find("gpu");
+	if (gpu != json.end())
+	{
+		for (auto &counter_name : gpu->items())
+		{
+			auto counter = gpu_counter_names.find(counter_name.value().get<std::string>());
+			if (counter != gpu_counter_names.end())
+			{
+				enabled_gpu_counters.insert(counter->second);
+			}
+			else
+			{
+				HWCPIPE_LOG("GPU counter \"%s\" not found.", counter_name.value().get<std::string>().c_str());
+			}
+		}
+	}
+
+	create_profilers(std::move(enabled_cpu_counters), std::move(enabled_gpu_counters));
+}
+#endif
+
+HWCPipe::HWCPipe(CpuCounterSet enabled_cpu_counters, GpuCounterSet enabled_gpu_counters)
+{
+	create_profilers(std::move(enabled_cpu_counters), std::move(enabled_gpu_counters));
+}
+
+HWCPipe::HWCPipe()
+{
+	CpuCounterSet enabled_cpu_counters{CpuCounter::Cycles,
+	                                   CpuCounter::Instructions,
+	                                   CpuCounter::CacheReferences,
+	                                   CpuCounter::CacheMisses,
+	                                   CpuCounter::BranchInstructions,
+	                                   CpuCounter::BranchMisses};
+
+	GpuCounterSet enabled_gpu_counters{GpuCounter::GpuCycles,
+	                                   GpuCounter::VertexComputeCycles,
+	                                   GpuCounter::FragmentCycles,
+	                                   GpuCounter::TilerCycles,
+	                                   GpuCounter::CacheReadLookups,
+	                                   GpuCounter::CacheWriteLookups,
+	                                   GpuCounter::ExternalMemoryReadAccesses,
+	                                   GpuCounter::ExternalMemoryWriteAccesses,
+	                                   GpuCounter::ExternalMemoryReadStalls,
+	                                   GpuCounter::ExternalMemoryWriteStalls,
+	                                   GpuCounter::ExternalMemoryReadBytes,
+	                                   GpuCounter::ExternalMemoryWriteBytes};
+
+	create_profilers(std::move(enabled_cpu_counters), std::move(enabled_gpu_counters));
+}
+
+void HWCPipe::set_enabled_cpu_counters(CpuCounterSet counters)
+{
+	if (cpu_profiler_)
+	{
+		cpu_profiler_->set_enabled_counters(std::move(counters));
+	}
+}
+
+void HWCPipe::set_enabled_gpu_counters(GpuCounterSet counters)
+{
+	if (gpu_profiler_)
+	{
+		gpu_profiler_->set_enabled_counters(std::move(counters));
+	}
+}
+
+void HWCPipe::run()
+{
+	if (cpu_profiler_)
+	{
+		cpu_profiler_->run();
+	}
+	if (gpu_profiler_)
+	{
+		gpu_profiler_->run();
+	}
+}
+
+Measurements HWCPipe::sample()
+{
+	Measurements m;
+	if (cpu_profiler_)
+	{
+		m.cpu = &cpu_profiler_->sample();
+	}
+	if (gpu_profiler_)
+	{
+		m.gpu = &gpu_profiler_->sample();
+	}
+	return m;
+}
+
+void HWCPipe::stop()
+{
+	if (cpu_profiler_)
+	{
+		cpu_profiler_->stop();
+	}
+	if (gpu_profiler_)
+	{
+		gpu_profiler_->stop();
+	}
+}
+
+void HWCPipe::create_profilers(CpuCounterSet enabled_cpu_counters, GpuCounterSet enabled_gpu_counters)
+{
+	// Automated platform detection
+#ifdef __linux__
+	try
+	{
+		if (enabled_cpu_counters.size() != 0)
+		{
+			cpu_profiler_ = std::unique_ptr<PmuProfiler>(new PmuProfiler(enabled_cpu_counters));
+		}
+	}
+	catch (const std::runtime_error &e)
+	{
+		HWCPIPE_LOG("PMU profiler initialization failed");
+	}
+
+	try
+	{
+		if (enabled_gpu_counters.size() != 0)
+		{
+			gpu_profiler_ = std::unique_ptr<MaliProfiler>(new MaliProfiler(enabled_gpu_counters));
+		}
+	}
+	catch (const std::runtime_error &e)
+	{
+		HWCPIPE_LOG("Mali profiler initialization failed");
+	}
+#else
+	HWCPIPE_LOG("No counters available for this platform.");
+#endif
+}
+
+}        // namespace hwcpipe

+ 90 - 0
ThirdParty/HwcPipe/hwcpipe.h

@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#include "cpu_profiler.h"
+#include "gpu_profiler.h"
+
+#include <functional>
+#include <memory>
+
+namespace hwcpipe
+{
+struct Measurements
+{
+	const CpuMeasurements *cpu{nullptr};
+	const GpuMeasurements *gpu{nullptr};
+};
+
+/** A class that collects CPU/GPU performance data. */
+class HWCPipe
+{
+  public:
+#ifndef HWCPIPE_NO_JSON
+	// Initializes HWCPipe via a JSON configuration string
+	explicit HWCPipe(const char *json_string);
+#endif
+
+	// Initializes HWCPipe with the specified counters
+	HWCPipe(CpuCounterSet enabled_cpu_counters, GpuCounterSet enabled_gpu_counters);
+
+	// Initializes HWCPipe with a default set of counters
+	HWCPipe();
+
+	// Sets the enabled counters for the CPU profiler
+	void set_enabled_cpu_counters(CpuCounterSet counters);
+
+	// Sets the enabled counters for the GPU profiler
+	void set_enabled_gpu_counters(GpuCounterSet counters);
+
+	// Starts a profiling session
+	void run();
+
+	// Sample the counters. The function returns pointers to the CPU and GPU
+	// measurements maps, if the corresponding profiler is enabled.
+	// The entries in the maps are the counters that are both available and enabled.
+	// A profiling session must be running when sampling the counters.
+	Measurements sample();
+
+	// Stops the active profiling session
+	void stop();
+
+	const CpuProfiler *cpu_profiler()
+	{
+		return cpu_profiler_.get();
+	}
+	const GpuProfiler *gpu_profiler()
+	{
+		return gpu_profiler_.get();
+	}
+
+  private:
+	std::unique_ptr<CpuProfiler> cpu_profiler_{};
+	std::unique_ptr<GpuProfiler> gpu_profiler_{};
+
+	void create_profilers(CpuCounterSet enabled_cpu_counters, GpuCounterSet enabled_gpu_counters);
+};
+
+}        // namespace hwcpipe

+ 59 - 0
ThirdParty/HwcPipe/hwcpipe_log.h

@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdexcept>
+
+#pragma once
+
+#define HWCPIPE_TAG "HWCPipe"
+
+#if defined(__ANDROID__)
+#	include <android/log.h>
+
+#	define HWCPIPE_LOG(...) __android_log_print(ANDROID_LOG_VERBOSE, HWCPIPE_TAG, __VA_ARGS__)
+#else
+#	define HWCPIPE_LOG(...)                              \
+		{                                                 \
+			fprintf(stdout, "%s [INFO] : ", HWCPIPE_TAG); \
+			fprintf(stdout, __VA_ARGS__);                 \
+			fprintf(stdout, "\n");                        \
+		}
+#endif
+
+namespace std
+{
+	struct runtime_error_anki
+	{
+		runtime_error_anki(::std::string s)
+        {
+            HWCPIPE_LOG("%s", s.c_str());
+			abort();
+        }
+	};
+}
+
+#define throw
+#define runtime_error runtime_error_anki
+#define try if(true)
+#define catch(x) if(false)

+ 69 - 0
ThirdParty/HwcPipe/value.h

@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+namespace hwcpipe
+{
+class Value
+{
+  public:
+	Value() :
+	    is_int_(true),
+	    int_(0),
+	    double_(0.0f)
+	{}
+	Value(long long value) :
+	    is_int_(true),
+	    int_(value)
+	{}
+	Value(double value) :
+	    is_int_(false),
+	    double_(value)
+	{}
+
+	template <typename T>
+	T get() const
+	{
+		return is_int_ ? static_cast<T>(int_) : static_cast<T>(double_);
+	}
+
+	void set(long long value)
+	{
+		int_    = value;
+		is_int_ = true;
+	}
+
+	void set(double value)
+	{
+		double_ = value;
+		is_int_ = false;
+	}
+
+  private:
+	bool      is_int_;
+	long long int_{0};
+	double    double_{0.0};
+};
+}        // namespace hwcpipe

+ 413 - 0
ThirdParty/HwcPipe/vendor/arm/mali/hwc.hpp

@@ -0,0 +1,413 @@
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "hwc_names.hpp"
+
+#ifndef DOXYGEN_SKIP_THIS
+
+#	if defined(ANDROID) || defined(__ANDROID__)
+/* We use _IOR_BAD/_IOW_BAD rather than _IOR/_IOW otherwise fails to compile with NDK-BUILD because of _IOC_TYPECHECK is defined, not because the paramter is invalid */
+#		define MALI_IOR(a, b, c) _IOR_BAD(a, b, c)
+#		define MALI_IOW(a, b, c) _IOW_BAD(a, b, c)
+#	else
+#		define MALI_IOR(a, b, c) _IOR(a, b, c)
+#		define MALI_IOW(a, b, c) _IOW(a, b, c)
+#	endif
+
+namespace mali_userspace
+{
+union uk_header
+{
+	uint32_t id;
+	uint32_t ret;
+	uint64_t sizer;
+};
+
+#	define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+#	define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props
+{
+	uint32_t product_id;
+	uint16_t version_status;
+	uint16_t minor_revision;
+	uint16_t major_revision;
+	uint16_t padding;
+	uint32_t gpu_speed_mhz;
+	uint32_t gpu_freq_khz_max;
+	uint32_t gpu_freq_khz_min;
+	uint32_t log2_program_counter_size;
+	uint32_t texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+	uint64_t gpu_available_memory_size;
+};
+
+struct mali_base_gpu_l2_cache_props
+{
+	uint8_t log2_line_size;
+	uint8_t log2_cache_size;
+	uint8_t num_l2_slices;
+	uint8_t padding[5];
+};
+
+struct mali_base_gpu_tiler_props
+{
+	uint32_t bin_size_bytes;
+	uint32_t max_active_levels;
+};
+
+struct mali_base_gpu_thread_props
+{
+	uint32_t max_threads;
+	uint32_t max_workgroup_size;
+	uint32_t max_barrier_size;
+	uint16_t max_registers;
+	uint8_t  max_task_queue;
+	uint8_t  max_thread_group_split;
+	uint8_t  impl_tech;
+	uint8_t  padding[7];
+};
+
+struct mali_base_gpu_coherent_group
+{
+	uint64_t core_mask;
+	uint16_t num_cores;
+	uint16_t padding[3];
+};
+
+struct mali_base_gpu_coherent_group_info
+{
+	uint32_t                     num_groups;
+	uint32_t                     num_core_groups;
+	uint32_t                     coherency;
+	uint32_t                     padding;
+	mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+#	define GPU_MAX_JOB_SLOTS 16
+struct gpu_raw_gpu_props
+{
+	uint64_t shader_present;
+	uint64_t tiler_present;
+	uint64_t l2_present;
+	uint64_t unused_1;
+
+	uint32_t l2_features;
+	uint32_t suspend_size;
+	uint32_t mem_features;
+	uint32_t mmu_features;
+
+	uint32_t as_present;
+
+	uint32_t js_present;
+	uint32_t js_features[GPU_MAX_JOB_SLOTS];
+	uint32_t tiler_features;
+	uint32_t texture_features[3];
+
+	uint32_t gpu_id;
+
+	uint32_t thread_max_threads;
+	uint32_t thread_max_workgroup_size;
+	uint32_t thread_max_barrier_size;
+	uint32_t thread_features;
+
+	uint32_t coherency_mode;
+};
+
+struct mali_base_gpu_props
+{
+	mali_base_gpu_core_props          core_props;
+	mali_base_gpu_l2_cache_props      l2_props;
+	uint64_t                          unused;
+	mali_base_gpu_tiler_props         tiler_props;
+	mali_base_gpu_thread_props        thread_props;
+	gpu_raw_gpu_props                 raw_props;
+	mali_base_gpu_coherent_group_info coherency_info;
+};
+
+struct kbase_uk_gpuprops
+{
+	uk_header           header;
+	mali_base_gpu_props props;
+};
+
+#	define KBASE_GPUPROP_VALUE_SIZE_U8 (0x0)
+#	define KBASE_GPUPROP_VALUE_SIZE_U16 (0x1)
+#	define KBASE_GPUPROP_VALUE_SIZE_U32 (0x2)
+#	define KBASE_GPUPROP_VALUE_SIZE_U64 (0x3)
+
+#	define KBASE_GPUPROP_PRODUCT_ID 1
+#	define KBASE_GPUPROP_MINOR_REVISION 3
+#	define KBASE_GPUPROP_MAJOR_REVISION 4
+
+#	define KBASE_GPUPROP_COHERENCY_NUM_GROUPS 61
+#	define KBASE_GPUPROP_COHERENCY_NUM_CORE_GROUPS 62
+#	define KBASE_GPUPROP_COHERENCY_GROUP_0 64
+#	define KBASE_GPUPROP_COHERENCY_GROUP_1 65
+#	define KBASE_GPUPROP_COHERENCY_GROUP_2 66
+#	define KBASE_GPUPROP_COHERENCY_GROUP_3 67
+#	define KBASE_GPUPROP_COHERENCY_GROUP_4 68
+#	define KBASE_GPUPROP_COHERENCY_GROUP_5 69
+#	define KBASE_GPUPROP_COHERENCY_GROUP_6 70
+#	define KBASE_GPUPROP_COHERENCY_GROUP_7 71
+#	define KBASE_GPUPROP_COHERENCY_GROUP_8 72
+#	define KBASE_GPUPROP_COHERENCY_GROUP_9 73
+#	define KBASE_GPUPROP_COHERENCY_GROUP_10 74
+#	define KBASE_GPUPROP_COHERENCY_GROUP_11 75
+#	define KBASE_GPUPROP_COHERENCY_GROUP_12 76
+#	define KBASE_GPUPROP_COHERENCY_GROUP_13 77
+#	define KBASE_GPUPROP_COHERENCY_GROUP_14 78
+#	define KBASE_GPUPROP_COHERENCY_GROUP_15 79
+
+#	define KBASE_GPUPROP_L2_NUM_L2_SLICES 15
+
+struct gpu_props
+{
+	uint32_t product_id;
+	uint16_t minor_revision;
+	uint16_t major_revision;
+	uint32_t num_groups;
+	uint32_t num_core_groups;
+	uint64_t core_mask[16];
+
+	uint32_t l2_slices;
+};
+
+static const struct
+{
+	uint32_t type;
+	size_t   offset;
+	int      size;
+} gpu_property_mapping[] = {
+#	define PROP(name, member)                                        \
+		{                                                             \
+			KBASE_GPUPROP_##name, offsetof(struct gpu_props, member), \
+			    sizeof(((struct gpu_props *) 0)->member)              \
+		}
+    PROP(PRODUCT_ID, product_id),
+    PROP(MINOR_REVISION, minor_revision),
+    PROP(MAJOR_REVISION, major_revision),
+    PROP(COHERENCY_NUM_GROUPS, num_groups),
+    PROP(COHERENCY_NUM_CORE_GROUPS, num_core_groups),
+    PROP(COHERENCY_GROUP_0, core_mask[0]),
+    PROP(COHERENCY_GROUP_1, core_mask[1]),
+    PROP(COHERENCY_GROUP_2, core_mask[2]),
+    PROP(COHERENCY_GROUP_3, core_mask[3]),
+    PROP(COHERENCY_GROUP_4, core_mask[4]),
+    PROP(COHERENCY_GROUP_5, core_mask[5]),
+    PROP(COHERENCY_GROUP_6, core_mask[6]),
+    PROP(COHERENCY_GROUP_7, core_mask[7]),
+    PROP(COHERENCY_GROUP_8, core_mask[8]),
+    PROP(COHERENCY_GROUP_9, core_mask[9]),
+    PROP(COHERENCY_GROUP_10, core_mask[10]),
+    PROP(COHERENCY_GROUP_11, core_mask[11]),
+    PROP(COHERENCY_GROUP_12, core_mask[12]),
+    PROP(COHERENCY_GROUP_13, core_mask[13]),
+    PROP(COHERENCY_GROUP_14, core_mask[14]),
+    PROP(COHERENCY_GROUP_15, core_mask[15]),
+
+    PROP(L2_NUM_L2_SLICES, l2_slices),
+#	undef PROP
+    {0, 0, 0}};
+
+struct kbase_hwcnt_reader_metadata
+{
+	uint64_t timestamp  = 0;
+	uint32_t event_id   = 0;
+	uint32_t buffer_idx = 0;
+};
+
+namespace
+{
+/** Message header */
+union kbase_uk_hwcnt_header
+{
+	/* 32-bit number identifying the UK function to be called. */
+	uint32_t id;
+	/* The int return code returned by the called UK function. */
+	uint32_t ret;
+	/* Used to ensure 64-bit alignment of this union. Do not remove. */
+	uint64_t sizer;
+};
+
+/** IOCTL parameters to check version */
+struct kbase_uk_hwcnt_reader_version_check_args
+{
+	union kbase_uk_hwcnt_header header;
+
+	uint16_t major;
+	uint16_t minor;
+	uint8_t  padding[4];
+};
+
+union kbase_pointer
+{
+	void *   value;
+	uint32_t compat_value;
+	uint64_t sizer;
+};
+
+struct kbase_ioctl_get_gpuprops
+{
+	kbase_pointer buffer;
+	uint32_t      size;
+	uint32_t      flags;
+};
+
+struct kbase_ioctl_version_check
+{
+	uint16_t major;
+	uint16_t minor;
+};
+
+struct kbase_ioctl_set_flags
+{
+	uint32_t create_flags;
+};
+
+struct kbase_ioctl_hwcnt_reader_setup
+{
+	uint32_t buffer_count;
+	uint32_t jm_bm;
+	uint32_t shader_bm;
+	uint32_t tiler_bm;
+	uint32_t mmu_l2_bm;
+};
+
+#	define KBASE_IOCTL_TYPE 0x80
+#	define KBASE_IOCTL_GET_GPUPROPS MALI_IOW(KBASE_IOCTL_TYPE, 3, struct mali_userspace::kbase_ioctl_get_gpuprops)
+#	define KBASE_IOCTL_VERSION_CHECK _IOWR(KBASE_IOCTL_TYPE, 0, struct mali_userspace::kbase_ioctl_version_check)
+#	define KBASE_IOCTL_SET_FLAGS _IOW(KBASE_IOCTL_TYPE, 1, struct mali_userspace::kbase_ioctl_set_flags)
+#	define KBASE_IOCTL_HWCNT_READER_SETUP _IOW(KBASE_IOCTL_TYPE, 8, struct mali_userspace::kbase_ioctl_hwcnt_reader_setup)
+
+/** IOCTL parameters to set flags */
+struct kbase_uk_hwcnt_reader_set_flags
+{
+	union kbase_uk_hwcnt_header header;
+
+	uint32_t create_flags;
+	uint32_t padding;
+};
+
+/** IOCTL parameters to configure reader */
+struct kbase_uk_hwcnt_reader_setup
+{
+	union kbase_uk_hwcnt_header header;
+
+	/* IN */
+	uint32_t buffer_count;
+	uint32_t jm_bm;
+	uint32_t shader_bm;
+	uint32_t tiler_bm;
+	uint32_t mmu_l2_bm;
+
+	/* OUT */
+	int32_t fd;
+};
+
+static const uint32_t HWCNT_READER_API = 1;
+
+struct uku_version_check_args
+{
+	uk_header header;
+	uint16_t  major;
+	uint16_t  minor;
+	uint8_t   padding[4];
+};
+
+enum
+{
+	UKP_FUNC_ID_CHECK_VERSION = 0,
+	/* Related to mali0 ioctl interface */
+	LINUX_UK_BASE_MAGIC              = 0x80,
+	BASE_CONTEXT_CREATE_KERNEL_FLAGS = 0x2,
+	KBASE_FUNC_HWCNT_UK_FUNC_ID      = 512,
+	KBASE_FUNC_GPU_PROPS_REG_DUMP    = KBASE_FUNC_HWCNT_UK_FUNC_ID + 14,
+	KBASE_FUNC_HWCNT_READER_SETUP    = KBASE_FUNC_HWCNT_UK_FUNC_ID + 36,
+	KBASE_FUNC_HWCNT_DUMP            = KBASE_FUNC_HWCNT_UK_FUNC_ID + 11,
+	KBASE_FUNC_HWCNT_CLEAR           = KBASE_FUNC_HWCNT_UK_FUNC_ID + 12,
+	KBASE_FUNC_SET_FLAGS             = KBASE_FUNC_HWCNT_UK_FUNC_ID + 18,
+
+	/* The ids of ioctl commands for the reader interface */
+	KBASE_HWCNT_READER                 = 0xBE,
+	KBASE_HWCNT_READER_GET_HWVER       = MALI_IOR(KBASE_HWCNT_READER, 0x00, uint32_t),
+	KBASE_HWCNT_READER_GET_BUFFER_SIZE = MALI_IOR(KBASE_HWCNT_READER, 0x01, uint32_t),
+	KBASE_HWCNT_READER_DUMP            = MALI_IOW(KBASE_HWCNT_READER, 0x10, uint32_t),
+	KBASE_HWCNT_READER_CLEAR           = MALI_IOW(KBASE_HWCNT_READER, 0x11, uint32_t),
+	KBASE_HWCNT_READER_GET_BUFFER      = MALI_IOR(KBASE_HWCNT_READER, 0x20, struct kbase_hwcnt_reader_metadata),
+	KBASE_HWCNT_READER_PUT_BUFFER      = MALI_IOW(KBASE_HWCNT_READER, 0x21, struct kbase_hwcnt_reader_metadata),
+	KBASE_HWCNT_READER_SET_INTERVAL    = MALI_IOW(KBASE_HWCNT_READER, 0x30, uint32_t),
+	KBASE_HWCNT_READER_ENABLE_EVENT    = MALI_IOW(KBASE_HWCNT_READER, 0x40, uint32_t),
+	KBASE_HWCNT_READER_DISABLE_EVENT   = MALI_IOW(KBASE_HWCNT_READER, 0x41, uint32_t),
+	KBASE_HWCNT_READER_GET_API_VERSION = MALI_IOW(KBASE_HWCNT_READER, 0xFF, uint32_t)
+};
+
+enum
+{
+	PIPE_DESCRIPTOR_IN,  /**< The index of a pipe's input descriptor. */
+	PIPE_DESCRIPTOR_OUT, /**< The index of a pipe's output descriptor. */
+
+	PIPE_DESCRIPTOR_COUNT /**< The number of descriptors forming a pipe. */
+};
+
+enum
+{
+	POLL_DESCRIPTOR_SIGNAL,       /**< The index of the signal descriptor in poll fds array. */
+	POLL_DESCRIPTOR_HWCNT_READER, /**< The index of the hwcnt reader descriptor in poll fds array. */
+
+	POLL_DESCRIPTOR_COUNT /**< The number of descriptors poll is waiting for. */
+};
+
+/** Write a single byte into the pipe to interrupt the reader thread */
+typedef char poll_data_t;
+}        // namespace
+
+template <typename T>
+static inline int mali_ioctl(int fd, T &arg)
+{
+	auto *    hdr = &arg.header;
+	const int cmd = _IOC(_IOC_READ | _IOC_WRITE, LINUX_UK_BASE_MAGIC, hdr->id, sizeof(T));
+
+	if (ioctl(fd, cmd, &arg))
+		return -1;
+	if (hdr->ret)
+		return -1;
+
+	return 0;
+}
+}        // namespace mali_userspace
+
+#endif /* DOXYGEN_SKIP_THIS */

+ 4966 - 0
ThirdParty/HwcPipe/vendor/arm/mali/hwc_names.hpp

@@ -0,0 +1,4966 @@
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#ifndef DOXYGEN_SKIP_THIS
+
+namespace mali_userspace
+{
+enum MaliCounterBlockName
+{
+	MALI_NAME_BLOCK_JM     = 0,
+	MALI_NAME_BLOCK_TILER  = 1,
+	MALI_NAME_BLOCK_SHADER = 2,
+	MALI_NAME_BLOCK_MMU    = 3
+};
+
+enum
+{
+	MALI_NAME_BLOCK_SIZE = 64
+};
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+    static const char * const hardware_counters_mali_t60x[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "T60x_MESSAGES_SENT",
+        "T60x_MESSAGES_RECEIVED",
+        "T60x_GPU_ACTIVE",
+        "T60x_IRQ_ACTIVE",
+        "T60x_JS0_JOBS",
+        "T60x_JS0_TASKS",
+        "T60x_JS0_ACTIVE",
+        "",
+        "T60x_JS0_WAIT_READ",
+        "T60x_JS0_WAIT_ISSUE",
+        "T60x_JS0_WAIT_DEPEND",
+        "T60x_JS0_WAIT_FINISH",
+        "T60x_JS1_JOBS",
+        "T60x_JS1_TASKS",
+        "T60x_JS1_ACTIVE",
+        "",
+        "T60x_JS1_WAIT_READ",
+        "T60x_JS1_WAIT_ISSUE",
+        "T60x_JS1_WAIT_DEPEND",
+        "T60x_JS1_WAIT_FINISH",
+        "T60x_JS2_JOBS",
+        "T60x_JS2_TASKS",
+        "T60x_JS2_ACTIVE",
+        "",
+        "T60x_JS2_WAIT_READ",
+        "T60x_JS2_WAIT_ISSUE",
+        "T60x_JS2_WAIT_DEPEND",
+        "T60x_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "T60x_TI_JOBS_PROCESSED",
+        "T60x_TI_TRIANGLES",
+        "T60x_TI_QUADS",
+        "T60x_TI_POLYGONS",
+        "T60x_TI_POINTS",
+        "T60x_TI_LINES",
+        "T60x_TI_VCACHE_HIT",
+        "T60x_TI_VCACHE_MISS",
+        "T60x_TI_FRONT_FACING",
+        "T60x_TI_BACK_FACING",
+        "T60x_TI_PRIM_VISIBLE",
+        "T60x_TI_PRIM_CULLED",
+        "T60x_TI_PRIM_CLIPPED",
+        "T60x_TI_LEVEL0",
+        "T60x_TI_LEVEL1",
+        "T60x_TI_LEVEL2",
+        "T60x_TI_LEVEL3",
+        "T60x_TI_LEVEL4",
+        "T60x_TI_LEVEL5",
+        "T60x_TI_LEVEL6",
+        "T60x_TI_LEVEL7",
+        "T60x_TI_COMMAND_1",
+        "T60x_TI_COMMAND_2",
+        "T60x_TI_COMMAND_3",
+        "T60x_TI_COMMAND_4",
+        "T60x_TI_COMMAND_4_7",
+        "T60x_TI_COMMAND_8_15",
+        "T60x_TI_COMMAND_16_63",
+        "T60x_TI_COMMAND_64",
+        "T60x_TI_COMPRESS_IN",
+        "T60x_TI_COMPRESS_OUT",
+        "T60x_TI_COMPRESS_FLUSH",
+        "T60x_TI_TIMESTAMPS",
+        "T60x_TI_PCACHE_HIT",
+        "T60x_TI_PCACHE_MISS",
+        "T60x_TI_PCACHE_LINE",
+        "T60x_TI_PCACHE_STALL",
+        "T60x_TI_WRBUF_HIT",
+        "T60x_TI_WRBUF_MISS",
+        "T60x_TI_WRBUF_LINE",
+        "T60x_TI_WRBUF_PARTIAL",
+        "T60x_TI_WRBUF_STALL",
+        "T60x_TI_ACTIVE",
+        "T60x_TI_LOADING_DESC",
+        "T60x_TI_INDEX_WAIT",
+        "T60x_TI_INDEX_RANGE_WAIT",
+        "T60x_TI_VERTEX_WAIT",
+        "T60x_TI_PCACHE_WAIT",
+        "T60x_TI_WRBUF_WAIT",
+        "T60x_TI_BUS_READ",
+        "T60x_TI_BUS_WRITE",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T60x_TI_UTLB_STALL",
+        "T60x_TI_UTLB_REPLAY_MISS",
+        "T60x_TI_UTLB_REPLAY_FULL",
+        "T60x_TI_UTLB_NEW_MISS",
+        "T60x_TI_UTLB_HIT",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "T60x_FRAG_ACTIVE",
+        "T60x_FRAG_PRIMITIVES",
+        "T60x_FRAG_PRIMITIVES_DROPPED",
+        "T60x_FRAG_CYCLES_DESC",
+        "T60x_FRAG_CYCLES_PLR",
+        "T60x_FRAG_CYCLES_VERT",
+        "T60x_FRAG_CYCLES_TRISETUP",
+        "T60x_FRAG_CYCLES_RAST",
+        "T60x_FRAG_THREADS",
+        "T60x_FRAG_DUMMY_THREADS",
+        "T60x_FRAG_QUADS_RAST",
+        "T60x_FRAG_QUADS_EZS_TEST",
+        "T60x_FRAG_QUADS_EZS_KILLED",
+        "T60x_FRAG_THREADS_LZS_TEST",
+        "T60x_FRAG_THREADS_LZS_KILLED",
+        "T60x_FRAG_CYCLES_NO_TILE",
+        "T60x_FRAG_NUM_TILES",
+        "T60x_FRAG_TRANS_ELIM",
+        "T60x_COMPUTE_ACTIVE",
+        "T60x_COMPUTE_TASKS",
+        "T60x_COMPUTE_THREADS",
+        "T60x_COMPUTE_CYCLES_DESC",
+        "T60x_TRIPIPE_ACTIVE",
+        "T60x_ARITH_WORDS",
+        "T60x_ARITH_CYCLES_REG",
+        "T60x_ARITH_CYCLES_L0",
+        "T60x_ARITH_FRAG_DEPEND",
+        "T60x_LS_WORDS",
+        "T60x_LS_ISSUES",
+        "T60x_LS_RESTARTS",
+        "T60x_LS_REISSUES_MISS",
+        "T60x_LS_REISSUES_VD",
+        "T60x_LS_REISSUE_ATTRIB_MISS",
+        "T60x_LS_NO_WB",
+        "T60x_TEX_WORDS",
+        "T60x_TEX_BUBBLES",
+        "T60x_TEX_WORDS_L0",
+        "T60x_TEX_WORDS_DESC",
+        "T60x_TEX_ISSUES",
+        "T60x_TEX_RECIRC_FMISS",
+        "T60x_TEX_RECIRC_DESC",
+        "T60x_TEX_RECIRC_MULTI",
+        "T60x_TEX_RECIRC_PMISS",
+        "T60x_TEX_RECIRC_CONF",
+        "T60x_LSC_READ_HITS",
+        "T60x_LSC_READ_MISSES",
+        "T60x_LSC_WRITE_HITS",
+        "T60x_LSC_WRITE_MISSES",
+        "T60x_LSC_ATOMIC_HITS",
+        "T60x_LSC_ATOMIC_MISSES",
+        "T60x_LSC_LINE_FETCHES",
+        "T60x_LSC_DIRTY_LINE",
+        "T60x_LSC_SNOOPS",
+        "T60x_AXI_TLB_STALL",
+        "T60x_AXI_TLB_MISS",
+        "T60x_AXI_TLB_TRANSACTION",
+        "T60x_LS_TLB_MISS",
+        "T60x_LS_TLB_HIT",
+        "T60x_AXI_BEATS_READ",
+        "T60x_AXI_BEATS_WRITTEN",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "T60x_MMU_HIT",
+        "T60x_MMU_NEW_MISS",
+        "T60x_MMU_REPLAY_FULL",
+        "T60x_MMU_REPLAY_MISS",
+        "T60x_MMU_TABLE_WALK",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T60x_UTLB_HIT",
+        "T60x_UTLB_NEW_MISS",
+        "T60x_UTLB_REPLAY_FULL",
+        "T60x_UTLB_REPLAY_MISS",
+        "T60x_UTLB_STALL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T60x_L2_EXT_WRITE_BEATS",
+        "T60x_L2_EXT_READ_BEATS",
+        "T60x_L2_ANY_LOOKUP",
+        "T60x_L2_READ_LOOKUP",
+        "T60x_L2_SREAD_LOOKUP",
+        "T60x_L2_READ_REPLAY",
+        "T60x_L2_READ_SNOOP",
+        "T60x_L2_READ_HIT",
+        "T60x_L2_CLEAN_MISS",
+        "T60x_L2_WRITE_LOOKUP",
+        "T60x_L2_SWRITE_LOOKUP",
+        "T60x_L2_WRITE_REPLAY",
+        "T60x_L2_WRITE_SNOOP",
+        "T60x_L2_WRITE_HIT",
+        "T60x_L2_EXT_READ_FULL",
+        "T60x_L2_EXT_READ_HALF",
+        "T60x_L2_EXT_WRITE_FULL",
+        "T60x_L2_EXT_WRITE_HALF",
+        "T60x_L2_EXT_READ",
+        "T60x_L2_EXT_READ_LINE",
+        "T60x_L2_EXT_WRITE",
+        "T60x_L2_EXT_WRITE_LINE",
+        "T60x_L2_EXT_WRITE_SMALL",
+        "T60x_L2_EXT_BARRIER",
+        "T60x_L2_EXT_AR_STALL",
+        "T60x_L2_EXT_R_BUF_FULL",
+        "T60x_L2_EXT_RD_BUF_FULL",
+        "T60x_L2_EXT_R_RAW",
+        "T60x_L2_EXT_W_STALL",
+        "T60x_L2_EXT_W_BUF_FULL",
+        "T60x_L2_EXT_R_W_HAZARD",
+        "T60x_L2_TAG_HAZARD",
+        "T60x_L2_SNOOP_FULL",
+        "T60x_L2_REPLAY_FULL"};
+
+    static const char * const hardware_counters_mali_t62x[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "T62x_MESSAGES_SENT",
+        "T62x_MESSAGES_RECEIVED",
+        "T62x_GPU_ACTIVE",
+        "T62x_IRQ_ACTIVE",
+        "T62x_JS0_JOBS",
+        "T62x_JS0_TASKS",
+        "T62x_JS0_ACTIVE",
+        "",
+        "T62x_JS0_WAIT_READ",
+        "T62x_JS0_WAIT_ISSUE",
+        "T62x_JS0_WAIT_DEPEND",
+        "T62x_JS0_WAIT_FINISH",
+        "T62x_JS1_JOBS",
+        "T62x_JS1_TASKS",
+        "T62x_JS1_ACTIVE",
+        "",
+        "T62x_JS1_WAIT_READ",
+        "T62x_JS1_WAIT_ISSUE",
+        "T62x_JS1_WAIT_DEPEND",
+        "T62x_JS1_WAIT_FINISH",
+        "T62x_JS2_JOBS",
+        "T62x_JS2_TASKS",
+        "T62x_JS2_ACTIVE",
+        "",
+        "T62x_JS2_WAIT_READ",
+        "T62x_JS2_WAIT_ISSUE",
+        "T62x_JS2_WAIT_DEPEND",
+        "T62x_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "T62x_TI_JOBS_PROCESSED",
+        "T62x_TI_TRIANGLES",
+        "T62x_TI_QUADS",
+        "T62x_TI_POLYGONS",
+        "T62x_TI_POINTS",
+        "T62x_TI_LINES",
+        "T62x_TI_VCACHE_HIT",
+        "T62x_TI_VCACHE_MISS",
+        "T62x_TI_FRONT_FACING",
+        "T62x_TI_BACK_FACING",
+        "T62x_TI_PRIM_VISIBLE",
+        "T62x_TI_PRIM_CULLED",
+        "T62x_TI_PRIM_CLIPPED",
+        "T62x_TI_LEVEL0",
+        "T62x_TI_LEVEL1",
+        "T62x_TI_LEVEL2",
+        "T62x_TI_LEVEL3",
+        "T62x_TI_LEVEL4",
+        "T62x_TI_LEVEL5",
+        "T62x_TI_LEVEL6",
+        "T62x_TI_LEVEL7",
+        "T62x_TI_COMMAND_1",
+        "T62x_TI_COMMAND_2",
+        "T62x_TI_COMMAND_3",
+        "T62x_TI_COMMAND_4",
+        "T62x_TI_COMMAND_5_7",
+        "T62x_TI_COMMAND_8_15",
+        "T62x_TI_COMMAND_16_63",
+        "T62x_TI_COMMAND_64",
+        "T62x_TI_COMPRESS_IN",
+        "T62x_TI_COMPRESS_OUT",
+        "T62x_TI_COMPRESS_FLUSH",
+        "T62x_TI_TIMESTAMPS",
+        "T62x_TI_PCACHE_HIT",
+        "T62x_TI_PCACHE_MISS",
+        "T62x_TI_PCACHE_LINE",
+        "T62x_TI_PCACHE_STALL",
+        "T62x_TI_WRBUF_HIT",
+        "T62x_TI_WRBUF_MISS",
+        "T62x_TI_WRBUF_LINE",
+        "T62x_TI_WRBUF_PARTIAL",
+        "T62x_TI_WRBUF_STALL",
+        "T62x_TI_ACTIVE",
+        "T62x_TI_LOADING_DESC",
+        "T62x_TI_INDEX_WAIT",
+        "T62x_TI_INDEX_RANGE_WAIT",
+        "T62x_TI_VERTEX_WAIT",
+        "T62x_TI_PCACHE_WAIT",
+        "T62x_TI_WRBUF_WAIT",
+        "T62x_TI_BUS_READ",
+        "T62x_TI_BUS_WRITE",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T62x_TI_UTLB_STALL",
+        "T62x_TI_UTLB_REPLAY_MISS",
+        "T62x_TI_UTLB_REPLAY_FULL",
+        "T62x_TI_UTLB_NEW_MISS",
+        "T62x_TI_UTLB_HIT",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "T62x_SHADER_CORE_ACTIVE",
+        "T62x_FRAG_ACTIVE",
+        "T62x_FRAG_PRIMITIVES",
+        "T62x_FRAG_PRIMITIVES_DROPPED",
+        "T62x_FRAG_CYCLES_DESC",
+        "T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+        "T62x_FRAG_CYCLES_VERT",
+        "T62x_FRAG_CYCLES_TRISETUP",
+        "T62x_FRAG_CYCLES_EZS_ACTIVE",
+        "T62x_FRAG_THREADS",
+        "T62x_FRAG_DUMMY_THREADS",
+        "T62x_FRAG_QUADS_RAST",
+        "T62x_FRAG_QUADS_EZS_TEST",
+        "T62x_FRAG_QUADS_EZS_KILLED",
+        "T62x_FRAG_THREADS_LZS_TEST",
+        "T62x_FRAG_THREADS_LZS_KILLED",
+        "T62x_FRAG_CYCLES_NO_TILE",
+        "T62x_FRAG_NUM_TILES",
+        "T62x_FRAG_TRANS_ELIM",
+        "T62x_COMPUTE_ACTIVE",
+        "T62x_COMPUTE_TASKS",
+        "T62x_COMPUTE_THREADS",
+        "T62x_COMPUTE_CYCLES_DESC",
+        "T62x_TRIPIPE_ACTIVE",
+        "T62x_ARITH_WORDS",
+        "T62x_ARITH_CYCLES_REG",
+        "T62x_ARITH_CYCLES_L0",
+        "T62x_ARITH_FRAG_DEPEND",
+        "T62x_LS_WORDS",
+        "T62x_LS_ISSUES",
+        "T62x_LS_RESTARTS",
+        "T62x_LS_REISSUES_MISS",
+        "T62x_LS_REISSUES_VD",
+        "T62x_LS_REISSUE_ATTRIB_MISS",
+        "T62x_LS_NO_WB",
+        "T62x_TEX_WORDS",
+        "T62x_TEX_BUBBLES",
+        "T62x_TEX_WORDS_L0",
+        "T62x_TEX_WORDS_DESC",
+        "T62x_TEX_ISSUES",
+        "T62x_TEX_RECIRC_FMISS",
+        "T62x_TEX_RECIRC_DESC",
+        "T62x_TEX_RECIRC_MULTI",
+        "T62x_TEX_RECIRC_PMISS",
+        "T62x_TEX_RECIRC_CONF",
+        "T62x_LSC_READ_HITS",
+        "T62x_LSC_READ_MISSES",
+        "T62x_LSC_WRITE_HITS",
+        "T62x_LSC_WRITE_MISSES",
+        "T62x_LSC_ATOMIC_HITS",
+        "T62x_LSC_ATOMIC_MISSES",
+        "T62x_LSC_LINE_FETCHES",
+        "T62x_LSC_DIRTY_LINE",
+        "T62x_LSC_SNOOPS",
+        "T62x_AXI_TLB_STALL",
+        "T62x_AXI_TLB_MISS",
+        "T62x_AXI_TLB_TRANSACTION",
+        "T62x_LS_TLB_MISS",
+        "T62x_LS_TLB_HIT",
+        "T62x_AXI_BEATS_READ",
+        "T62x_AXI_BEATS_WRITTEN",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "T62x_MMU_HIT",
+        "T62x_MMU_NEW_MISS",
+        "T62x_MMU_REPLAY_FULL",
+        "T62x_MMU_REPLAY_MISS",
+        "T62x_MMU_TABLE_WALK",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T62x_UTLB_HIT",
+        "T62x_UTLB_NEW_MISS",
+        "T62x_UTLB_REPLAY_FULL",
+        "T62x_UTLB_REPLAY_MISS",
+        "T62x_UTLB_STALL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T62x_L2_EXT_WRITE_BEATS",
+        "T62x_L2_EXT_READ_BEATS",
+        "T62x_L2_ANY_LOOKUP",
+        "T62x_L2_READ_LOOKUP",
+        "T62x_L2_SREAD_LOOKUP",
+        "T62x_L2_READ_REPLAY",
+        "T62x_L2_READ_SNOOP",
+        "T62x_L2_READ_HIT",
+        "T62x_L2_CLEAN_MISS",
+        "T62x_L2_WRITE_LOOKUP",
+        "T62x_L2_SWRITE_LOOKUP",
+        "T62x_L2_WRITE_REPLAY",
+        "T62x_L2_WRITE_SNOOP",
+        "T62x_L2_WRITE_HIT",
+        "T62x_L2_EXT_READ_FULL",
+        "T62x_L2_EXT_READ_HALF",
+        "T62x_L2_EXT_WRITE_FULL",
+        "T62x_L2_EXT_WRITE_HALF",
+        "T62x_L2_EXT_READ",
+        "T62x_L2_EXT_READ_LINE",
+        "T62x_L2_EXT_WRITE",
+        "T62x_L2_EXT_WRITE_LINE",
+        "T62x_L2_EXT_WRITE_SMALL",
+        "T62x_L2_EXT_BARRIER",
+        "T62x_L2_EXT_AR_STALL",
+        "T62x_L2_EXT_R_BUF_FULL",
+        "T62x_L2_EXT_RD_BUF_FULL",
+        "T62x_L2_EXT_R_RAW",
+        "T62x_L2_EXT_W_STALL",
+        "T62x_L2_EXT_W_BUF_FULL",
+        "T62x_L2_EXT_R_W_HAZARD",
+        "T62x_L2_TAG_HAZARD",
+        "T62x_L2_SNOOP_FULL",
+        "T62x_L2_REPLAY_FULL"};
+
+    static const char * const hardware_counters_mali_t72x[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "T72x_GPU_ACTIVE",
+        "T72x_IRQ_ACTIVE",
+        "T72x_JS0_JOBS",
+        "T72x_JS0_TASKS",
+        "T72x_JS0_ACTIVE",
+        "T72x_JS1_JOBS",
+        "T72x_JS1_TASKS",
+        "T72x_JS1_ACTIVE",
+        "T72x_JS2_JOBS",
+        "T72x_JS2_TASKS",
+        "T72x_JS2_ACTIVE",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "T72x_TI_JOBS_PROCESSED",
+        "T72x_TI_TRIANGLES",
+        "T72x_TI_QUADS",
+        "T72x_TI_POLYGONS",
+        "T72x_TI_POINTS",
+        "T72x_TI_LINES",
+        "T72x_TI_FRONT_FACING",
+        "T72x_TI_BACK_FACING",
+        "T72x_TI_PRIM_VISIBLE",
+        "T72x_TI_PRIM_CULLED",
+        "T72x_TI_PRIM_CLIPPED",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T72x_TI_ACTIVE",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "T72x_FRAG_ACTIVE",
+        "T72x_FRAG_PRIMITIVES",
+        "T72x_FRAG_PRIMITIVES_DROPPED",
+        "T72x_FRAG_THREADS",
+        "T72x_FRAG_DUMMY_THREADS",
+        "T72x_FRAG_QUADS_RAST",
+        "T72x_FRAG_QUADS_EZS_TEST",
+        "T72x_FRAG_QUADS_EZS_KILLED",
+        "T72x_FRAG_THREADS_LZS_TEST",
+        "T72x_FRAG_THREADS_LZS_KILLED",
+        "T72x_FRAG_CYCLES_NO_TILE",
+        "T72x_FRAG_NUM_TILES",
+        "T72x_FRAG_TRANS_ELIM",
+        "T72x_COMPUTE_ACTIVE",
+        "T72x_COMPUTE_TASKS",
+        "T72x_COMPUTE_THREADS",
+        "T72x_TRIPIPE_ACTIVE",
+        "T72x_ARITH_WORDS",
+        "T72x_ARITH_CYCLES_REG",
+        "T72x_LS_WORDS",
+        "T72x_LS_ISSUES",
+        "T72x_LS_RESTARTS",
+        "T72x_LS_REISSUES_MISS",
+        "T72x_TEX_WORDS",
+        "T72x_TEX_BUBBLES",
+        "T72x_TEX_ISSUES",
+        "T72x_LSC_READ_HITS",
+        "T72x_LSC_READ_MISSES",
+        "T72x_LSC_WRITE_HITS",
+        "T72x_LSC_WRITE_MISSES",
+        "T72x_LSC_ATOMIC_HITS",
+        "T72x_LSC_ATOMIC_MISSES",
+        "T72x_LSC_LINE_FETCHES",
+        "T72x_LSC_DIRTY_LINE",
+        "T72x_LSC_SNOOPS",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "T72x_L2_EXT_WRITE_BEAT",
+        "T72x_L2_EXT_READ_BEAT",
+        "T72x_L2_READ_SNOOP",
+        "T72x_L2_READ_HIT",
+        "T72x_L2_WRITE_SNOOP",
+        "T72x_L2_WRITE_HIT",
+        "T72x_L2_EXT_WRITE_SMALL",
+        "T72x_L2_EXT_BARRIER",
+        "T72x_L2_EXT_AR_STALL",
+        "T72x_L2_EXT_W_STALL",
+        "T72x_L2_SNOOP_FULL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        ""};
+
+    static const char * const hardware_counters_mali_t76x[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "T76x_MESSAGES_SENT",
+        "T76x_MESSAGES_RECEIVED",
+        "T76x_GPU_ACTIVE",
+        "T76x_IRQ_ACTIVE",
+        "T76x_JS0_JOBS",
+        "T76x_JS0_TASKS",
+        "T76x_JS0_ACTIVE",
+        "",
+        "T76x_JS0_WAIT_READ",
+        "T76x_JS0_WAIT_ISSUE",
+        "T76x_JS0_WAIT_DEPEND",
+        "T76x_JS0_WAIT_FINISH",
+        "T76x_JS1_JOBS",
+        "T76x_JS1_TASKS",
+        "T76x_JS1_ACTIVE",
+        "",
+        "T76x_JS1_WAIT_READ",
+        "T76x_JS1_WAIT_ISSUE",
+        "T76x_JS1_WAIT_DEPEND",
+        "T76x_JS1_WAIT_FINISH",
+        "T76x_JS2_JOBS",
+        "T76x_JS2_TASKS",
+        "T76x_JS2_ACTIVE",
+        "",
+        "T76x_JS2_WAIT_READ",
+        "T76x_JS2_WAIT_ISSUE",
+        "T76x_JS2_WAIT_DEPEND",
+        "T76x_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "T76x_TI_JOBS_PROCESSED",
+        "T76x_TI_TRIANGLES",
+        "T76x_TI_QUADS",
+        "T76x_TI_POLYGONS",
+        "T76x_TI_POINTS",
+        "T76x_TI_LINES",
+        "T76x_TI_VCACHE_HIT",
+        "T76x_TI_VCACHE_MISS",
+        "T76x_TI_FRONT_FACING",
+        "T76x_TI_BACK_FACING",
+        "T76x_TI_PRIM_VISIBLE",
+        "T76x_TI_PRIM_CULLED",
+        "T76x_TI_PRIM_CLIPPED",
+        "T76x_TI_LEVEL0",
+        "T76x_TI_LEVEL1",
+        "T76x_TI_LEVEL2",
+        "T76x_TI_LEVEL3",
+        "T76x_TI_LEVEL4",
+        "T76x_TI_LEVEL5",
+        "T76x_TI_LEVEL6",
+        "T76x_TI_LEVEL7",
+        "T76x_TI_COMMAND_1",
+        "T76x_TI_COMMAND_2",
+        "T76x_TI_COMMAND_3",
+        "T76x_TI_COMMAND_4",
+        "T76x_TI_COMMAND_5_7",
+        "T76x_TI_COMMAND_8_15",
+        "T76x_TI_COMMAND_16_63",
+        "T76x_TI_COMMAND_64",
+        "T76x_TI_COMPRESS_IN",
+        "T76x_TI_COMPRESS_OUT",
+        "T76x_TI_COMPRESS_FLUSH",
+        "T76x_TI_TIMESTAMPS",
+        "T76x_TI_PCACHE_HIT",
+        "T76x_TI_PCACHE_MISS",
+        "T76x_TI_PCACHE_LINE",
+        "T76x_TI_PCACHE_STALL",
+        "T76x_TI_WRBUF_HIT",
+        "T76x_TI_WRBUF_MISS",
+        "T76x_TI_WRBUF_LINE",
+        "T76x_TI_WRBUF_PARTIAL",
+        "T76x_TI_WRBUF_STALL",
+        "T76x_TI_ACTIVE",
+        "T76x_TI_LOADING_DESC",
+        "T76x_TI_INDEX_WAIT",
+        "T76x_TI_INDEX_RANGE_WAIT",
+        "T76x_TI_VERTEX_WAIT",
+        "T76x_TI_PCACHE_WAIT",
+        "T76x_TI_WRBUF_WAIT",
+        "T76x_TI_BUS_READ",
+        "T76x_TI_BUS_WRITE",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T76x_TI_UTLB_HIT",
+        "T76x_TI_UTLB_NEW_MISS",
+        "T76x_TI_UTLB_REPLAY_FULL",
+        "T76x_TI_UTLB_REPLAY_MISS",
+        "T76x_TI_UTLB_STALL",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "T76x_FRAG_ACTIVE",
+        "T76x_FRAG_PRIMITIVES",
+        "T76x_FRAG_PRIMITIVES_DROPPED",
+        "T76x_FRAG_CYCLES_DESC",
+        "T76x_FRAG_CYCLES_PLR",
+        "T76x_FRAG_CYCLES_VERT",
+        "T76x_FRAG_CYCLES_TRISETUP",
+        "T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+        "T76x_FRAG_THREADS",
+        "T76x_FRAG_DUMMY_THREADS",
+        "T76x_FRAG_QUADS_RAST",
+        "T76x_FRAG_QUADS_EZS_TEST",
+        "T76x_FRAG_QUADS_EZS_KILLED",
+        "T76x_FRAG_THREADS_LZS_TEST",
+        "T76x_FRAG_THREADS_LZS_KILLED",
+        "T76x_FRAG_CYCLES_NO_TILE",
+        "T76x_FRAG_NUM_TILES",
+        "T76x_FRAG_TRANS_ELIM",
+        "T76x_COMPUTE_ACTIVE",
+        "T76x_COMPUTE_TASKS",
+        "T76x_COMPUTE_THREADS",
+        "T76x_COMPUTE_CYCLES_DESC",
+        "T76x_TRIPIPE_ACTIVE",
+        "T76x_ARITH_WORDS",
+        "T76x_ARITH_CYCLES_REG",
+        "T76x_ARITH_CYCLES_L0",
+        "T76x_ARITH_FRAG_DEPEND",
+        "T76x_LS_WORDS",
+        "T76x_LS_ISSUES",
+        "T76x_LS_REISSUE_ATTR",
+        "T76x_LS_REISSUES_VARY",
+        "T76x_LS_VARY_RV_MISS",
+        "T76x_LS_VARY_RV_HIT",
+        "T76x_LS_NO_UNPARK",
+        "T76x_TEX_WORDS",
+        "T76x_TEX_BUBBLES",
+        "T76x_TEX_WORDS_L0",
+        "T76x_TEX_WORDS_DESC",
+        "T76x_TEX_ISSUES",
+        "T76x_TEX_RECIRC_FMISS",
+        "T76x_TEX_RECIRC_DESC",
+        "T76x_TEX_RECIRC_MULTI",
+        "T76x_TEX_RECIRC_PMISS",
+        "T76x_TEX_RECIRC_CONF",
+        "T76x_LSC_READ_HITS",
+        "T76x_LSC_READ_OP",
+        "T76x_LSC_WRITE_HITS",
+        "T76x_LSC_WRITE_OP",
+        "T76x_LSC_ATOMIC_HITS",
+        "T76x_LSC_ATOMIC_OP",
+        "T76x_LSC_LINE_FETCHES",
+        "T76x_LSC_DIRTY_LINE",
+        "T76x_LSC_SNOOPS",
+        "T76x_AXI_TLB_STALL",
+        "T76x_AXI_TLB_MISS",
+        "T76x_AXI_TLB_TRANSACTION",
+        "T76x_LS_TLB_MISS",
+        "T76x_LS_TLB_HIT",
+        "T76x_AXI_BEATS_READ",
+        "T76x_AXI_BEATS_WRITTEN",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "T76x_MMU_HIT",
+        "T76x_MMU_NEW_MISS",
+        "T76x_MMU_REPLAY_FULL",
+        "T76x_MMU_REPLAY_MISS",
+        "T76x_MMU_TABLE_WALK",
+        "T76x_MMU_REQUESTS",
+        "",
+        "",
+        "T76x_UTLB_HIT",
+        "T76x_UTLB_NEW_MISS",
+        "T76x_UTLB_REPLAY_FULL",
+        "T76x_UTLB_REPLAY_MISS",
+        "T76x_UTLB_STALL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T76x_L2_EXT_WRITE_BEATS",
+        "T76x_L2_EXT_READ_BEATS",
+        "T76x_L2_ANY_LOOKUP",
+        "T76x_L2_READ_LOOKUP",
+        "T76x_L2_SREAD_LOOKUP",
+        "T76x_L2_READ_REPLAY",
+        "T76x_L2_READ_SNOOP",
+        "T76x_L2_READ_HIT",
+        "T76x_L2_CLEAN_MISS",
+        "T76x_L2_WRITE_LOOKUP",
+        "T76x_L2_SWRITE_LOOKUP",
+        "T76x_L2_WRITE_REPLAY",
+        "T76x_L2_WRITE_SNOOP",
+        "T76x_L2_WRITE_HIT",
+        "T76x_L2_EXT_READ_FULL",
+        "",
+        "T76x_L2_EXT_WRITE_FULL",
+        "T76x_L2_EXT_R_W_HAZARD",
+        "T76x_L2_EXT_READ",
+        "T76x_L2_EXT_READ_LINE",
+        "T76x_L2_EXT_WRITE",
+        "T76x_L2_EXT_WRITE_LINE",
+        "T76x_L2_EXT_WRITE_SMALL",
+        "T76x_L2_EXT_BARRIER",
+        "T76x_L2_EXT_AR_STALL",
+        "T76x_L2_EXT_R_BUF_FULL",
+        "T76x_L2_EXT_RD_BUF_FULL",
+        "T76x_L2_EXT_R_RAW",
+        "T76x_L2_EXT_W_STALL",
+        "T76x_L2_EXT_W_BUF_FULL",
+        "T76x_L2_EXT_R_BUF_FULL",
+        "T76x_L2_TAG_HAZARD",
+        "T76x_L2_SNOOP_FULL",
+        "T76x_L2_REPLAY_FULL"};
+
+    static const char * const hardware_counters_mali_t82x[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "T82x_MESSAGES_SENT",
+        "T82x_MESSAGES_RECEIVED",
+        "T82x_GPU_ACTIVE",
+        "T82x_IRQ_ACTIVE",
+        "T82x_JS0_JOBS",
+        "T82x_JS0_TASKS",
+        "T82x_JS0_ACTIVE",
+        "",
+        "T82x_JS0_WAIT_READ",
+        "T82x_JS0_WAIT_ISSUE",
+        "T82x_JS0_WAIT_DEPEND",
+        "T82x_JS0_WAIT_FINISH",
+        "T82x_JS1_JOBS",
+        "T82x_JS1_TASKS",
+        "T82x_JS1_ACTIVE",
+        "",
+        "T82x_JS1_WAIT_READ",
+        "T82x_JS1_WAIT_ISSUE",
+        "T82x_JS1_WAIT_DEPEND",
+        "T82x_JS1_WAIT_FINISH",
+        "T82x_JS2_JOBS",
+        "T82x_JS2_TASKS",
+        "T82x_JS2_ACTIVE",
+        "",
+        "T82x_JS2_WAIT_READ",
+        "T82x_JS2_WAIT_ISSUE",
+        "T82x_JS2_WAIT_DEPEND",
+        "T82x_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "T82x_TI_JOBS_PROCESSED",
+        "T82x_TI_TRIANGLES",
+        "T82x_TI_QUADS",
+        "T82x_TI_POLYGONS",
+        "T82x_TI_POINTS",
+        "T82x_TI_LINES",
+        "T82x_TI_FRONT_FACING",
+        "T82x_TI_BACK_FACING",
+        "T82x_TI_PRIM_VISIBLE",
+        "T82x_TI_PRIM_CULLED",
+        "T82x_TI_PRIM_CLIPPED",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T82x_TI_ACTIVE",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "T82x_FRAG_ACTIVE",
+        "T82x_FRAG_PRIMITIVES",
+        "T82x_FRAG_PRIMITIVES_DROPPED",
+        "T82x_FRAG_CYCLES_DESC",
+        "T82x_FRAG_CYCLES_ZS_REORDER_ACTIVE",
+        "T82x_FRAG_CYCLES_VERT",
+        "T82x_FRAG_CYCLES_TRISETUP",
+        "T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+        "T82x_FRAG_THREADS",
+        "T82x_FRAG_DUMMY_THREADS",
+        "T82x_FRAG_QUADS_RAST",
+        "T82x_FRAG_QUADS_EZS_TEST",
+        "T82x_FRAG_QUADS_EZS_KILLED",
+        "T82x_FRAG_THREADS_LZS_TEST",
+        "T82x_FRAG_THREADS_LZS_KILLED",
+        "T82x_FRAG_CYCLES_NO_TILE",
+        "T82x_FRAG_NUM_TILES",
+        "T82x_FRAG_TRANS_ELIM",
+        "T82x_COMPUTE_ACTIVE",
+        "T82x_COMPUTE_TASKS",
+        "T82x_COMPUTE_THREADS",
+        "T82x_COMPUTE_CYCLES_DESC",
+        "T82x_TRIPIPE_ACTIVE",
+        "T82x_ARITH_WORDS",
+        "T82x_ARITH_CYCLES_REG",
+        "T82x_ARITH_CYCLES_L0",
+        "T82x_ARITH_FRAG_DEPEND",
+        "T82x_LS_WORDS",
+        "T82x_LS_ISSUES",
+        "T82x_LS_REISSUE_ATTR",
+        "T82x_LS_REISSUES_VARY",
+        "T82x_LS_VARY_RV_MISS",
+        "T82x_LS_VARY_RV_HIT",
+        "T82x_LS_NO_UNPARK",
+        "T82x_TEX_WORDS",
+        "T82x_TEX_BUBBLES",
+        "T82x_TEX_WORDS_L0",
+        "T82x_TEX_WORDS_DESC",
+        "T82x_TEX_ISSUES",
+        "T82x_TEX_RECIRC_FMISS",
+        "T82x_TEX_RECIRC_DESC",
+        "T82x_TEX_RECIRC_MULTI",
+        "T82x_TEX_RECIRC_PMISS",
+        "T82x_TEX_RECIRC_CONF",
+        "T82x_LSC_READ_HITS",
+        "T82x_LSC_READ_OP",
+        "T82x_LSC_WRITE_HITS",
+        "T82x_LSC_WRITE_OP",
+        "T82x_LSC_ATOMIC_HITS",
+        "T82x_LSC_ATOMIC_OP",
+        "T82x_LSC_LINE_FETCHES",
+        "T82x_LSC_DIRTY_LINE",
+        "T82x_LSC_SNOOPS",
+        "T82x_AXI_TLB_STALL",
+        "T82x_AXI_TLB_MISS",
+        "T82x_AXI_TLB_TRANSACTION",
+        "T82x_LS_TLB_MISS",
+        "T82x_LS_TLB_HIT",
+        "T82x_AXI_BEATS_READ",
+        "T82x_AXI_BEATS_WRITTEN",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "T82x_MMU_HIT",
+        "T82x_MMU_NEW_MISS",
+        "T82x_MMU_REPLAY_FULL",
+        "T82x_MMU_REPLAY_MISS",
+        "T82x_MMU_TABLE_WALK",
+        "T82x_MMU_REQUESTS",
+        "",
+        "",
+        "T82x_UTLB_HIT",
+        "T82x_UTLB_NEW_MISS",
+        "T82x_UTLB_REPLAY_FULL",
+        "T82x_UTLB_REPLAY_MISS",
+        "T82x_UTLB_STALL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T82x_L2_EXT_WRITE_BEATS",
+        "T82x_L2_EXT_READ_BEATS",
+        "T82x_L2_ANY_LOOKUP",
+        "T82x_L2_READ_LOOKUP",
+        "T82x_L2_SREAD_LOOKUP",
+        "T82x_L2_READ_REPLAY",
+        "T82x_L2_READ_SNOOP",
+        "T82x_L2_READ_HIT",
+        "T82x_L2_CLEAN_MISS",
+        "T82x_L2_WRITE_LOOKUP",
+        "T82x_L2_SWRITE_LOOKUP",
+        "T82x_L2_WRITE_REPLAY",
+        "T82x_L2_WRITE_SNOOP",
+        "T82x_L2_WRITE_HIT",
+        "T82x_L2_EXT_READ_FULL",
+        "",
+        "T82x_L2_EXT_WRITE_FULL",
+        "T82x_L2_EXT_R_W_HAZARD",
+        "T82x_L2_EXT_READ",
+        "T82x_L2_EXT_READ_LINE",
+        "T82x_L2_EXT_WRITE",
+        "T82x_L2_EXT_WRITE_LINE",
+        "T82x_L2_EXT_WRITE_SMALL",
+        "T82x_L2_EXT_BARRIER",
+        "T82x_L2_EXT_AR_STALL",
+        "T82x_L2_EXT_R_BUF_FULL",
+        "T82x_L2_EXT_RD_BUF_FULL",
+        "T82x_L2_EXT_R_RAW",
+        "T82x_L2_EXT_W_STALL",
+        "T82x_L2_EXT_W_BUF_FULL",
+        "T82x_L2_EXT_R_BUF_FULL",
+        "T82x_L2_TAG_HAZARD",
+        "T82x_L2_SNOOP_FULL",
+        "T82x_L2_REPLAY_FULL"};
+
+    static const char * const hardware_counters_mali_t83x[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "T83x_MESSAGES_SENT",
+        "T83x_MESSAGES_RECEIVED",
+        "T83x_GPU_ACTIVE",
+        "T83x_IRQ_ACTIVE",
+        "T83x_JS0_JOBS",
+        "T83x_JS0_TASKS",
+        "T83x_JS0_ACTIVE",
+        "",
+        "T83x_JS0_WAIT_READ",
+        "T83x_JS0_WAIT_ISSUE",
+        "T83x_JS0_WAIT_DEPEND",
+        "T83x_JS0_WAIT_FINISH",
+        "T83x_JS1_JOBS",
+        "T83x_JS1_TASKS",
+        "T83x_JS1_ACTIVE",
+        "",
+        "T83x_JS1_WAIT_READ",
+        "T83x_JS1_WAIT_ISSUE",
+        "T83x_JS1_WAIT_DEPEND",
+        "T83x_JS1_WAIT_FINISH",
+        "T83x_JS2_JOBS",
+        "T83x_JS2_TASKS",
+        "T83x_JS2_ACTIVE",
+        "",
+        "T83x_JS2_WAIT_READ",
+        "T83x_JS2_WAIT_ISSUE",
+        "T83x_JS2_WAIT_DEPEND",
+        "T83x_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "T83x_TI_JOBS_PROCESSED",
+        "T83x_TI_TRIANGLES",
+        "T83x_TI_QUADS",
+        "T83x_TI_POLYGONS",
+        "T83x_TI_POINTS",
+        "T83x_TI_LINES",
+        "T83x_TI_FRONT_FACING",
+        "T83x_TI_BACK_FACING",
+        "T83x_TI_PRIM_VISIBLE",
+        "T83x_TI_PRIM_CULLED",
+        "T83x_TI_PRIM_CLIPPED",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T83x_TI_ACTIVE",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "T83x_FRAG_ACTIVE",
+        "T83x_FRAG_PRIMITIVES",
+        "T83x_FRAG_PRIMITIVES_DROPPED",
+        "T83x_FRAG_CYCLES_DESC",
+        "T83x_FRAG_CYCLES_ZS_REORDER_ACTIVE",
+        "T83x_FRAG_CYCLES_VERT",
+        "T83x_FRAG_CYCLES_TRISETUP",
+        "T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+        "T83x_FRAG_THREADS",
+        "T83x_FRAG_DUMMY_THREADS",
+        "T83x_FRAG_QUADS_RAST",
+        "T83x_FRAG_QUADS_EZS_TEST",
+        "T83x_FRAG_QUADS_EZS_KILLED",
+        "T83x_FRAG_THREADS_LZS_TEST",
+        "T83x_FRAG_THREADS_LZS_KILLED",
+        "T83x_FRAG_CYCLES_NO_TILE",
+        "T83x_FRAG_NUM_TILES",
+        "T83x_FRAG_TRANS_ELIM",
+        "T83x_COMPUTE_ACTIVE",
+        "T83x_COMPUTE_TASKS",
+        "T83x_COMPUTE_THREADS",
+        "T83x_COMPUTE_CYCLES_DESC",
+        "T83x_TRIPIPE_ACTIVE",
+        "T83x_ARITH_WORDS",
+        "T83x_ARITH_CYCLES_REG",
+        "T83x_ARITH_CYCLES_L0",
+        "T83x_ARITH_FRAG_DEPEND",
+        "T83x_LS_WORDS",
+        "T83x_LS_ISSUES",
+        "T83x_LS_REISSUE_ATTR",
+        "T83x_LS_REISSUES_VARY",
+        "T83x_LS_VARY_RV_MISS",
+        "T83x_LS_VARY_RV_HIT",
+        "T83x_LS_NO_UNPARK",
+        "T83x_TEX_WORDS",
+        "T83x_TEX_BUBBLES",
+        "T83x_TEX_WORDS_L0",
+        "T83x_TEX_WORDS_DESC",
+        "T83x_TEX_ISSUES",
+        "T83x_TEX_RECIRC_FMISS",
+        "T83x_TEX_RECIRC_DESC",
+        "T83x_TEX_RECIRC_MULTI",
+        "T83x_TEX_RECIRC_PMISS",
+        "T83x_TEX_RECIRC_CONF",
+        "T83x_LSC_READ_HITS",
+        "T83x_LSC_READ_OP",
+        "T83x_LSC_WRITE_HITS",
+        "T83x_LSC_WRITE_OP",
+        "T83x_LSC_ATOMIC_HITS",
+        "T83x_LSC_ATOMIC_OP",
+        "T83x_LSC_LINE_FETCHES",
+        "T83x_LSC_DIRTY_LINE",
+        "T83x_LSC_SNOOPS",
+        "T83x_AXI_TLB_STALL",
+        "T83x_AXI_TLB_MISS",
+        "T83x_AXI_TLB_TRANSACTION",
+        "T83x_LS_TLB_MISS",
+        "T83x_LS_TLB_HIT",
+        "T83x_AXI_BEATS_READ",
+        "T83x_AXI_BEATS_WRITTEN",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "T83x_MMU_HIT",
+        "T83x_MMU_NEW_MISS",
+        "T83x_MMU_REPLAY_FULL",
+        "T83x_MMU_REPLAY_MISS",
+        "T83x_MMU_TABLE_WALK",
+        "T83x_MMU_REQUESTS",
+        "",
+        "",
+        "T83x_UTLB_HIT",
+        "T83x_UTLB_NEW_MISS",
+        "T83x_UTLB_REPLAY_FULL",
+        "T83x_UTLB_REPLAY_MISS",
+        "T83x_UTLB_STALL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T83x_L2_EXT_WRITE_BEATS",
+        "T83x_L2_EXT_READ_BEATS",
+        "T83x_L2_ANY_LOOKUP",
+        "T83x_L2_READ_LOOKUP",
+        "T83x_L2_SREAD_LOOKUP",
+        "T83x_L2_READ_REPLAY",
+        "T83x_L2_READ_SNOOP",
+        "T83x_L2_READ_HIT",
+        "T83x_L2_CLEAN_MISS",
+        "T83x_L2_WRITE_LOOKUP",
+        "T83x_L2_SWRITE_LOOKUP",
+        "T83x_L2_WRITE_REPLAY",
+        "T83x_L2_WRITE_SNOOP",
+        "T83x_L2_WRITE_HIT",
+        "T83x_L2_EXT_READ_FULL",
+        "",
+        "T83x_L2_EXT_WRITE_FULL",
+        "T83x_L2_EXT_R_W_HAZARD",
+        "T83x_L2_EXT_READ",
+        "T83x_L2_EXT_READ_LINE",
+        "T83x_L2_EXT_WRITE",
+        "T83x_L2_EXT_WRITE_LINE",
+        "T83x_L2_EXT_WRITE_SMALL",
+        "T83x_L2_EXT_BARRIER",
+        "T83x_L2_EXT_AR_STALL",
+        "T83x_L2_EXT_R_BUF_FULL",
+        "T83x_L2_EXT_RD_BUF_FULL",
+        "T83x_L2_EXT_R_RAW",
+        "T83x_L2_EXT_W_STALL",
+        "T83x_L2_EXT_W_BUF_FULL",
+        "T83x_L2_EXT_R_BUF_FULL",
+        "T83x_L2_TAG_HAZARD",
+        "T83x_L2_SNOOP_FULL",
+        "T83x_L2_REPLAY_FULL"};
+
+    static const char * const hardware_counters_mali_t86x[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "T86x_MESSAGES_SENT",
+        "T86x_MESSAGES_RECEIVED",
+        "T86x_GPU_ACTIVE",
+        "T86x_IRQ_ACTIVE",
+        "T86x_JS0_JOBS",
+        "T86x_JS0_TASKS",
+        "T86x_JS0_ACTIVE",
+        "",
+        "T86x_JS0_WAIT_READ",
+        "T86x_JS0_WAIT_ISSUE",
+        "T86x_JS0_WAIT_DEPEND",
+        "T86x_JS0_WAIT_FINISH",
+        "T86x_JS1_JOBS",
+        "T86x_JS1_TASKS",
+        "T86x_JS1_ACTIVE",
+        "",
+        "T86x_JS1_WAIT_READ",
+        "T86x_JS1_WAIT_ISSUE",
+        "T86x_JS1_WAIT_DEPEND",
+        "T86x_JS1_WAIT_FINISH",
+        "T86x_JS2_JOBS",
+        "T86x_JS2_TASKS",
+        "T86x_JS2_ACTIVE",
+        "",
+        "T86x_JS2_WAIT_READ",
+        "T86x_JS2_WAIT_ISSUE",
+        "T86x_JS2_WAIT_DEPEND",
+        "T86x_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "T86x_TI_JOBS_PROCESSED",
+        "T86x_TI_TRIANGLES",
+        "T86x_TI_QUADS",
+        "T86x_TI_POLYGONS",
+        "T86x_TI_POINTS",
+        "T86x_TI_LINES",
+        "T86x_TI_VCACHE_HIT",
+        "T86x_TI_VCACHE_MISS",
+        "T86x_TI_FRONT_FACING",
+        "T86x_TI_BACK_FACING",
+        "T86x_TI_PRIM_VISIBLE",
+        "T86x_TI_PRIM_CULLED",
+        "T86x_TI_PRIM_CLIPPED",
+        "T86x_TI_LEVEL0",
+        "T86x_TI_LEVEL1",
+        "T86x_TI_LEVEL2",
+        "T86x_TI_LEVEL3",
+        "T86x_TI_LEVEL4",
+        "T86x_TI_LEVEL5",
+        "T86x_TI_LEVEL6",
+        "T86x_TI_LEVEL7",
+        "T86x_TI_COMMAND_1",
+        "T86x_TI_COMMAND_2",
+        "T86x_TI_COMMAND_3",
+        "T86x_TI_COMMAND_4",
+        "T86x_TI_COMMAND_5_7",
+        "T86x_TI_COMMAND_8_15",
+        "T86x_TI_COMMAND_16_63",
+        "T86x_TI_COMMAND_64",
+        "T86x_TI_COMPRESS_IN",
+        "T86x_TI_COMPRESS_OUT",
+        "T86x_TI_COMPRESS_FLUSH",
+        "T86x_TI_TIMESTAMPS",
+        "T86x_TI_PCACHE_HIT",
+        "T86x_TI_PCACHE_MISS",
+        "T86x_TI_PCACHE_LINE",
+        "T86x_TI_PCACHE_STALL",
+        "T86x_TI_WRBUF_HIT",
+        "T86x_TI_WRBUF_MISS",
+        "T86x_TI_WRBUF_LINE",
+        "T86x_TI_WRBUF_PARTIAL",
+        "T86x_TI_WRBUF_STALL",
+        "T86x_TI_ACTIVE",
+        "T86x_TI_LOADING_DESC",
+        "T86x_TI_INDEX_WAIT",
+        "T86x_TI_INDEX_RANGE_WAIT",
+        "T86x_TI_VERTEX_WAIT",
+        "T86x_TI_PCACHE_WAIT",
+        "T86x_TI_WRBUF_WAIT",
+        "T86x_TI_BUS_READ",
+        "T86x_TI_BUS_WRITE",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T86x_TI_UTLB_HIT",
+        "T86x_TI_UTLB_NEW_MISS",
+        "T86x_TI_UTLB_REPLAY_FULL",
+        "T86x_TI_UTLB_REPLAY_MISS",
+        "T86x_TI_UTLB_STALL",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "T86x_FRAG_ACTIVE",
+        "T86x_FRAG_PRIMITIVES",
+        "T86x_FRAG_PRIMITIVES_DROPPED",
+        "T86x_FRAG_CYCLES_DESC",
+        "T86x_FRAG_CYCLES_ZS_REORDER_ACTIVE",
+        "T86x_FRAG_CYCLES_VERT",
+        "T86x_FRAG_CYCLES_TRISETUP",
+        "T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+        "T86x_FRAG_THREADS",
+        "T86x_FRAG_DUMMY_THREADS",
+        "T86x_FRAG_QUADS_RAST",
+        "T86x_FRAG_QUADS_EZS_TEST",
+        "T86x_FRAG_QUADS_EZS_KILLED",
+        "T86x_FRAG_THREADS_LZS_TEST",
+        "T86x_FRAG_THREADS_LZS_KILLED",
+        "T86x_FRAG_CYCLES_NO_TILE",
+        "T86x_FRAG_NUM_TILES",
+        "T86x_FRAG_TRANS_ELIM",
+        "T86x_COMPUTE_ACTIVE",
+        "T86x_COMPUTE_TASKS",
+        "T86x_COMPUTE_THREADS",
+        "T86x_COMPUTE_CYCLES_DESC",
+        "T86x_TRIPIPE_ACTIVE",
+        "T86x_ARITH_WORDS",
+        "T86x_ARITH_CYCLES_REG",
+        "T86x_ARITH_CYCLES_L0",
+        "T86x_ARITH_FRAG_DEPEND",
+        "T86x_LS_WORDS",
+        "T86x_LS_ISSUES",
+        "T86x_LS_REISSUE_ATTR",
+        "T86x_LS_REISSUES_VARY",
+        "T86x_LS_VARY_RV_MISS",
+        "T86x_LS_VARY_RV_HIT",
+        "T86x_LS_NO_UNPARK",
+        "T86x_TEX_WORDS",
+        "T86x_TEX_BUBBLES",
+        "T86x_TEX_WORDS_L0",
+        "T86x_TEX_WORDS_DESC",
+        "T86x_TEX_ISSUES",
+        "T86x_TEX_RECIRC_FMISS",
+        "T86x_TEX_RECIRC_DESC",
+        "T86x_TEX_RECIRC_MULTI",
+        "T86x_TEX_RECIRC_PMISS",
+        "T86x_TEX_RECIRC_CONF",
+        "T86x_LSC_READ_HITS",
+        "T86x_LSC_READ_OP",
+        "T86x_LSC_WRITE_HITS",
+        "T86x_LSC_WRITE_OP",
+        "T86x_LSC_ATOMIC_HITS",
+        "T86x_LSC_ATOMIC_OP",
+        "T86x_LSC_LINE_FETCHES",
+        "T86x_LSC_DIRTY_LINE",
+        "T86x_LSC_SNOOPS",
+        "T86x_AXI_TLB_STALL",
+        "T86x_AXI_TLB_MISS",
+        "T86x_AXI_TLB_TRANSACTION",
+        "T86x_LS_TLB_MISS",
+        "T86x_LS_TLB_HIT",
+        "T86x_AXI_BEATS_READ",
+        "T86x_AXI_BEATS_WRITTEN",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "T86x_MMU_HIT",
+        "T86x_MMU_NEW_MISS",
+        "T86x_MMU_REPLAY_FULL",
+        "T86x_MMU_REPLAY_MISS",
+        "T86x_MMU_TABLE_WALK",
+        "T86x_MMU_REQUESTS",
+        "",
+        "",
+        "T86x_UTLB_HIT",
+        "T86x_UTLB_NEW_MISS",
+        "T86x_UTLB_REPLAY_FULL",
+        "T86x_UTLB_REPLAY_MISS",
+        "T86x_UTLB_STALL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T86x_L2_EXT_WRITE_BEATS",
+        "T86x_L2_EXT_READ_BEATS",
+        "T86x_L2_ANY_LOOKUP",
+        "T86x_L2_READ_LOOKUP",
+        "T86x_L2_SREAD_LOOKUP",
+        "T86x_L2_READ_REPLAY",
+        "T86x_L2_READ_SNOOP",
+        "T86x_L2_READ_HIT",
+        "T86x_L2_CLEAN_MISS",
+        "T86x_L2_WRITE_LOOKUP",
+        "T86x_L2_SWRITE_LOOKUP",
+        "T86x_L2_WRITE_REPLAY",
+        "T86x_L2_WRITE_SNOOP",
+        "T86x_L2_WRITE_HIT",
+        "T86x_L2_EXT_READ_FULL",
+        "",
+        "T86x_L2_EXT_WRITE_FULL",
+        "T86x_L2_EXT_R_W_HAZARD",
+        "T86x_L2_EXT_READ",
+        "T86x_L2_EXT_READ_LINE",
+        "T86x_L2_EXT_WRITE",
+        "T86x_L2_EXT_WRITE_LINE",
+        "T86x_L2_EXT_WRITE_SMALL",
+        "T86x_L2_EXT_BARRIER",
+        "T86x_L2_EXT_AR_STALL",
+        "T86x_L2_EXT_R_BUF_FULL",
+        "T86x_L2_EXT_RD_BUF_FULL",
+        "T86x_L2_EXT_R_RAW",
+        "T86x_L2_EXT_W_STALL",
+        "T86x_L2_EXT_W_BUF_FULL",
+        "T86x_L2_EXT_R_BUF_FULL",
+        "T86x_L2_TAG_HAZARD",
+        "T86x_L2_SNOOP_FULL",
+        "T86x_L2_REPLAY_FULL"};
+
+    static const char * const hardware_counters_mali_t88x[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "T88x_MESSAGES_SENT",
+        "T88x_MESSAGES_RECEIVED",
+        "T88x_GPU_ACTIVE",
+        "T88x_IRQ_ACTIVE",
+        "T88x_JS0_JOBS",
+        "T88x_JS0_TASKS",
+        "T88x_JS0_ACTIVE",
+        "",
+        "T88x_JS0_WAIT_READ",
+        "T88x_JS0_WAIT_ISSUE",
+        "T88x_JS0_WAIT_DEPEND",
+        "T88x_JS0_WAIT_FINISH",
+        "T88x_JS1_JOBS",
+        "T88x_JS1_TASKS",
+        "T88x_JS1_ACTIVE",
+        "",
+        "T88x_JS1_WAIT_READ",
+        "T88x_JS1_WAIT_ISSUE",
+        "T88x_JS1_WAIT_DEPEND",
+        "T88x_JS1_WAIT_FINISH",
+        "T88x_JS2_JOBS",
+        "T88x_JS2_TASKS",
+        "T88x_JS2_ACTIVE",
+        "",
+        "T88x_JS2_WAIT_READ",
+        "T88x_JS2_WAIT_ISSUE",
+        "T88x_JS2_WAIT_DEPEND",
+        "T88x_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "T88x_TI_JOBS_PROCESSED",
+        "T88x_TI_TRIANGLES",
+        "T88x_TI_QUADS",
+        "T88x_TI_POLYGONS",
+        "T88x_TI_POINTS",
+        "T88x_TI_LINES",
+        "T88x_TI_VCACHE_HIT",
+        "T88x_TI_VCACHE_MISS",
+        "T88x_TI_FRONT_FACING",
+        "T88x_TI_BACK_FACING",
+        "T88x_TI_PRIM_VISIBLE",
+        "T88x_TI_PRIM_CULLED",
+        "T88x_TI_PRIM_CLIPPED",
+        "T88x_TI_LEVEL0",
+        "T88x_TI_LEVEL1",
+        "T88x_TI_LEVEL2",
+        "T88x_TI_LEVEL3",
+        "T88x_TI_LEVEL4",
+        "T88x_TI_LEVEL5",
+        "T88x_TI_LEVEL6",
+        "T88x_TI_LEVEL7",
+        "T88x_TI_COMMAND_1",
+        "T88x_TI_COMMAND_2",
+        "T88x_TI_COMMAND_3",
+        "T88x_TI_COMMAND_4",
+        "T88x_TI_COMMAND_5_7",
+        "T88x_TI_COMMAND_8_15",
+        "T88x_TI_COMMAND_16_63",
+        "T88x_TI_COMMAND_64",
+        "T88x_TI_COMPRESS_IN",
+        "T88x_TI_COMPRESS_OUT",
+        "T88x_TI_COMPRESS_FLUSH",
+        "T88x_TI_TIMESTAMPS",
+        "T88x_TI_PCACHE_HIT",
+        "T88x_TI_PCACHE_MISS",
+        "T88x_TI_PCACHE_LINE",
+        "T88x_TI_PCACHE_STALL",
+        "T88x_TI_WRBUF_HIT",
+        "T88x_TI_WRBUF_MISS",
+        "T88x_TI_WRBUF_LINE",
+        "T88x_TI_WRBUF_PARTIAL",
+        "T88x_TI_WRBUF_STALL",
+        "T88x_TI_ACTIVE",
+        "T88x_TI_LOADING_DESC",
+        "T88x_TI_INDEX_WAIT",
+        "T88x_TI_INDEX_RANGE_WAIT",
+        "T88x_TI_VERTEX_WAIT",
+        "T88x_TI_PCACHE_WAIT",
+        "T88x_TI_WRBUF_WAIT",
+        "T88x_TI_BUS_READ",
+        "T88x_TI_BUS_WRITE",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T88x_TI_UTLB_HIT",
+        "T88x_TI_UTLB_NEW_MISS",
+        "T88x_TI_UTLB_REPLAY_FULL",
+        "T88x_TI_UTLB_REPLAY_MISS",
+        "T88x_TI_UTLB_STALL",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "T88x_FRAG_ACTIVE",
+        "T88x_FRAG_PRIMITIVES",
+        "T88x_FRAG_PRIMITIVES_DROPPED",
+        "T88x_FRAG_CYCLES_DESC",
+        "T88x_FRAG_CYCLES_ZS_REORDER_ACTIVE",
+        "T88x_FRAG_CYCLES_VERT",
+        "T88x_FRAG_CYCLES_TRISETUP",
+        "T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+        "T88x_FRAG_THREADS",
+        "T88x_FRAG_DUMMY_THREADS",
+        "T88x_FRAG_QUADS_RAST",
+        "T88x_FRAG_QUADS_EZS_TEST",
+        "T88x_FRAG_QUADS_EZS_KILLED",
+        "T88x_FRAG_THREADS_LZS_TEST",
+        "T88x_FRAG_THREADS_LZS_KILLED",
+        "T88x_FRAG_CYCLES_NO_TILE",
+        "T88x_FRAG_NUM_TILES",
+        "T88x_FRAG_TRANS_ELIM",
+        "T88x_COMPUTE_ACTIVE",
+        "T88x_COMPUTE_TASKS",
+        "T88x_COMPUTE_THREADS",
+        "T88x_COMPUTE_CYCLES_DESC",
+        "T88x_TRIPIPE_ACTIVE",
+        "T88x_ARITH_WORDS",
+        "T88x_ARITH_CYCLES_REG",
+        "T88x_ARITH_CYCLES_L0",
+        "T88x_ARITH_FRAG_DEPEND",
+        "T88x_LS_WORDS",
+        "T88x_LS_ISSUES",
+        "T88x_LS_REISSUE_ATTR",
+        "T88x_LS_REISSUES_VARY",
+        "T88x_LS_VARY_RV_MISS",
+        "T88x_LS_VARY_RV_HIT",
+        "T88x_LS_NO_UNPARK",
+        "T88x_TEX_WORDS",
+        "T88x_TEX_BUBBLES",
+        "T88x_TEX_WORDS_L0",
+        "T88x_TEX_WORDS_DESC",
+        "T88x_TEX_ISSUES",
+        "T88x_TEX_RECIRC_FMISS",
+        "T88x_TEX_RECIRC_DESC",
+        "T88x_TEX_RECIRC_MULTI",
+        "T88x_TEX_RECIRC_PMISS",
+        "T88x_TEX_RECIRC_CONF",
+        "T88x_LSC_READ_HITS",
+        "T88x_LSC_READ_OP",
+        "T88x_LSC_WRITE_HITS",
+        "T88x_LSC_WRITE_OP",
+        "T88x_LSC_ATOMIC_HITS",
+        "T88x_LSC_ATOMIC_OP",
+        "T88x_LSC_LINE_FETCHES",
+        "T88x_LSC_DIRTY_LINE",
+        "T88x_LSC_SNOOPS",
+        "T88x_AXI_TLB_STALL",
+        "T88x_AXI_TLB_MISS",
+        "T88x_AXI_TLB_TRANSACTION",
+        "T88x_LS_TLB_MISS",
+        "T88x_LS_TLB_HIT",
+        "T88x_AXI_BEATS_READ",
+        "T88x_AXI_BEATS_WRITTEN",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "T88x_MMU_HIT",
+        "T88x_MMU_NEW_MISS",
+        "T88x_MMU_REPLAY_FULL",
+        "T88x_MMU_REPLAY_MISS",
+        "T88x_MMU_TABLE_WALK",
+        "T88x_MMU_REQUESTS",
+        "",
+        "",
+        "T88x_UTLB_HIT",
+        "T88x_UTLB_NEW_MISS",
+        "T88x_UTLB_REPLAY_FULL",
+        "T88x_UTLB_REPLAY_MISS",
+        "T88x_UTLB_STALL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "T88x_L2_EXT_WRITE_BEATS",
+        "T88x_L2_EXT_READ_BEATS",
+        "T88x_L2_ANY_LOOKUP",
+        "T88x_L2_READ_LOOKUP",
+        "T88x_L2_SREAD_LOOKUP",
+        "T88x_L2_READ_REPLAY",
+        "T88x_L2_READ_SNOOP",
+        "T88x_L2_READ_HIT",
+        "T88x_L2_CLEAN_MISS",
+        "T88x_L2_WRITE_LOOKUP",
+        "T88x_L2_SWRITE_LOOKUP",
+        "T88x_L2_WRITE_REPLAY",
+        "T88x_L2_WRITE_SNOOP",
+        "T88x_L2_WRITE_HIT",
+        "T88x_L2_EXT_READ_FULL",
+        "",
+        "T88x_L2_EXT_WRITE_FULL",
+        "T88x_L2_EXT_R_W_HAZARD",
+        "T88x_L2_EXT_READ",
+        "T88x_L2_EXT_READ_LINE",
+        "T88x_L2_EXT_WRITE",
+        "T88x_L2_EXT_WRITE_LINE",
+        "T88x_L2_EXT_WRITE_SMALL",
+        "T88x_L2_EXT_BARRIER",
+        "T88x_L2_EXT_AR_STALL",
+        "T88x_L2_EXT_R_BUF_FULL",
+        "T88x_L2_EXT_RD_BUF_FULL",
+        "T88x_L2_EXT_R_RAW",
+        "T88x_L2_EXT_W_STALL",
+        "T88x_L2_EXT_W_BUF_FULL",
+        "T88x_L2_EXT_R_BUF_FULL",
+        "T88x_L2_TAG_HAZARD",
+        "T88x_L2_SNOOP_FULL",
+        "T88x_L2_REPLAY_FULL"};
+
+    static const char * const hardware_counters_mali_tHEx[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "THEx_MESSAGES_SENT",
+        "THEx_MESSAGES_RECEIVED",
+        "THEx_GPU_ACTIVE",
+        "THEx_IRQ_ACTIVE",
+        "THEx_JS0_JOBS",
+        "THEx_JS0_TASKS",
+        "THEx_JS0_ACTIVE",
+        "",
+        "THEx_JS0_WAIT_READ",
+        "THEx_JS0_WAIT_ISSUE",
+        "THEx_JS0_WAIT_DEPEND",
+        "THEx_JS0_WAIT_FINISH",
+        "THEx_JS1_JOBS",
+        "THEx_JS1_TASKS",
+        "THEx_JS1_ACTIVE",
+        "",
+        "THEx_JS1_WAIT_READ",
+        "THEx_JS1_WAIT_ISSUE",
+        "THEx_JS1_WAIT_DEPEND",
+        "THEx_JS1_WAIT_FINISH",
+        "THEx_JS2_JOBS",
+        "THEx_JS2_TASKS",
+        "THEx_JS2_ACTIVE",
+        "",
+        "THEx_JS2_WAIT_READ",
+        "THEx_JS2_WAIT_ISSUE",
+        "THEx_JS2_WAIT_DEPEND",
+        "THEx_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "",
+        "THEx_TILER_ACTIVE",
+        "THEx_JOBS_PROCESSED",
+        "THEx_TRIANGLES",
+        "THEx_LINES",
+        "THEx_POINTS",
+        "THEx_FRONT_FACING",
+        "THEx_BACK_FACING",
+        "THEx_PRIM_VISIBLE",
+        "THEx_PRIM_CULLED",
+        "THEx_PRIM_CLIPPED",
+        "THEx_PRIM_SAT_CULLED",
+        "",
+        "",
+        "THEx_BUS_READ",
+        "",
+        "THEx_BUS_WRITE",
+        "THEx_LOADING_DESC",
+        "THEx_IDVS_POS_SHAD_REQ",
+        "THEx_IDVS_POS_SHAD_WAIT",
+        "THEx_IDVS_POS_SHAD_STALL",
+        "THEx_IDVS_POS_FIFO_FULL",
+        "THEx_PREFETCH_STALL",
+        "THEx_VCACHE_HIT",
+        "THEx_VCACHE_MISS",
+        "THEx_VCACHE_LINE_WAIT",
+        "THEx_VFETCH_POS_READ_WAIT",
+        "THEx_VFETCH_VERTEX_WAIT",
+        "THEx_VFETCH_STALL",
+        "THEx_PRIMASSY_STALL",
+        "THEx_BBOX_GEN_STALL",
+        "THEx_IDVS_VBU_HIT",
+        "THEx_IDVS_VBU_MISS",
+        "THEx_IDVS_VBU_LINE_DEALLOCATE",
+        "THEx_IDVS_VAR_SHAD_REQ",
+        "THEx_IDVS_VAR_SHAD_STALL",
+        "THEx_BINNER_STALL",
+        "THEx_ITER_STALL",
+        "THEx_COMPRESS_MISS",
+        "THEx_COMPRESS_STALL",
+        "THEx_PCACHE_HIT",
+        "THEx_PCACHE_MISS",
+        "THEx_PCACHE_MISS_STALL",
+        "THEx_PCACHE_EVICT_STALL",
+        "THEx_PMGR_PTR_WR_STALL",
+        "THEx_PMGR_PTR_RD_STALL",
+        "THEx_PMGR_CMD_WR_STALL",
+        "THEx_WRBUF_ACTIVE",
+        "THEx_WRBUF_HIT",
+        "THEx_WRBUF_MISS",
+        "THEx_WRBUF_NO_FREE_LINE_STALL",
+        "THEx_WRBUF_NO_AXI_ID_STALL",
+        "THEx_WRBUF_AXI_STALL",
+        "",
+        "",
+        "",
+        "THEx_UTLB_TRANS",
+        "THEx_UTLB_TRANS_HIT",
+        "THEx_UTLB_TRANS_STALL",
+        "THEx_UTLB_TRANS_MISS_DELAY",
+        "THEx_UTLB_MMU_REQ",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "THEx_FRAG_ACTIVE",
+        "THEx_FRAG_PRIMITIVES",
+        "THEx_FRAG_PRIM_RAST",
+        "THEx_FRAG_FPK_ACTIVE",
+        "THEx_FRAG_STARVING",
+        "THEx_FRAG_WARPS",
+        "THEx_FRAG_PARTIAL_WARPS",
+        "THEx_FRAG_QUADS_RAST",
+        "THEx_FRAG_QUADS_EZS_TEST",
+        "THEx_FRAG_QUADS_EZS_UPDATE",
+        "THEx_FRAG_QUADS_EZS_KILL",
+        "THEx_FRAG_LZS_TEST",
+        "THEx_FRAG_LZS_KILL",
+        "",
+        "THEx_FRAG_PTILES",
+        "THEx_FRAG_TRANS_ELIM",
+        "THEx_QUAD_FPK_KILLER",
+        "",
+        "THEx_COMPUTE_ACTIVE",
+        "THEx_COMPUTE_TASKS",
+        "THEx_COMPUTE_WARPS",
+        "THEx_COMPUTE_STARVING",
+        "THEx_EXEC_CORE_ACTIVE",
+        "THEx_EXEC_ACTIVE",
+        "THEx_EXEC_INSTR_COUNT",
+        "THEx_EXEC_INSTR_DIVERGED",
+        "THEx_EXEC_INSTR_STARVING",
+        "THEx_ARITH_INSTR_SINGLE_FMA",
+        "THEx_ARITH_INSTR_DOUBLE",
+        "THEx_ARITH_INSTR_MSG",
+        "THEx_ARITH_INSTR_MSG_ONLY",
+        "THEx_TEX_INSTR",
+        "THEx_TEX_INSTR_MIPMAP",
+        "THEx_TEX_INSTR_COMPRESSED",
+        "THEx_TEX_INSTR_3D",
+        "THEx_TEX_INSTR_TRILINEAR",
+        "THEx_TEX_COORD_ISSUE",
+        "THEx_TEX_COORD_STALL",
+        "THEx_TEX_STARVE_CACHE",
+        "THEx_TEX_STARVE_FILTER",
+        "THEx_LS_MEM_READ_FULL",
+        "THEx_LS_MEM_READ_SHORT",
+        "THEx_LS_MEM_WRITE_FULL",
+        "THEx_LS_MEM_WRITE_SHORT",
+        "THEx_LS_MEM_ATOMIC",
+        "THEx_VARY_INSTR",
+        "THEx_VARY_SLOT_32",
+        "THEx_VARY_SLOT_16",
+        "THEx_ATTR_INSTR",
+        "THEx_ARITH_INSTR_FP_MUL",
+        "THEx_BEATS_RD_FTC",
+        "THEx_BEATS_RD_FTC_EXT",
+        "THEx_BEATS_RD_LSC",
+        "THEx_BEATS_RD_LSC_EXT",
+        "THEx_BEATS_RD_TEX",
+        "THEx_BEATS_RD_TEX_EXT",
+        "THEx_BEATS_RD_OTHER",
+        "THEx_BEATS_WR_LSC",
+        "THEx_BEATS_WR_TIB",
+        "",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "THEx_MMU_REQUESTS",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "THEx_L2_RD_MSG_IN",
+        "THEx_L2_RD_MSG_IN_STALL",
+        "THEx_L2_WR_MSG_IN",
+        "THEx_L2_WR_MSG_IN_STALL",
+        "THEx_L2_SNP_MSG_IN",
+        "THEx_L2_SNP_MSG_IN_STALL",
+        "THEx_L2_RD_MSG_OUT",
+        "THEx_L2_RD_MSG_OUT_STALL",
+        "THEx_L2_WR_MSG_OUT",
+        "THEx_L2_ANY_LOOKUP",
+        "THEx_L2_READ_LOOKUP",
+        "THEx_L2_WRITE_LOOKUP",
+        "THEx_L2_EXT_SNOOP_LOOKUP",
+        "THEx_L2_EXT_READ",
+        "THEx_L2_EXT_READ_NOSNP",
+        "THEx_L2_EXT_READ_UNIQUE",
+        "THEx_L2_EXT_READ_BEATS",
+        "THEx_L2_EXT_AR_STALL",
+        "THEx_L2_EXT_AR_CNT_Q1",
+        "THEx_L2_EXT_AR_CNT_Q2",
+        "THEx_L2_EXT_AR_CNT_Q3",
+        "THEx_L2_EXT_RRESP_0_127",
+        "THEx_L2_EXT_RRESP_128_191",
+        "THEx_L2_EXT_RRESP_192_255",
+        "THEx_L2_EXT_RRESP_256_319",
+        "THEx_L2_EXT_RRESP_320_383",
+        "THEx_L2_EXT_WRITE",
+        "THEx_L2_EXT_WRITE_NOSNP_FULL",
+        "THEx_L2_EXT_WRITE_NOSNP_PTL",
+        "THEx_L2_EXT_WRITE_SNP_FULL",
+        "THEx_L2_EXT_WRITE_SNP_PTL",
+        "THEx_L2_EXT_WRITE_BEATS",
+        "THEx_L2_EXT_W_STALL",
+        "THEx_L2_EXT_AW_CNT_Q1",
+        "THEx_L2_EXT_AW_CNT_Q2",
+        "THEx_L2_EXT_AW_CNT_Q3",
+        "THEx_L2_EXT_SNOOP",
+        "THEx_L2_EXT_SNOOP_STALL",
+        "THEx_L2_EXT_SNOOP_RESP_CLEAN",
+        "THEx_L2_EXT_SNOOP_RESP_DATA",
+        "THEx_L2_EXT_SNOOP_INTERNAL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    };
+
+    static const char * const hardware_counters_mali_tMIx[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "TMIx_MESSAGES_SENT",
+        "TMIx_MESSAGES_RECEIVED",
+        "TMIx_GPU_ACTIVE",
+        "TMIx_IRQ_ACTIVE",
+        "TMIx_JS0_JOBS",
+        "TMIx_JS0_TASKS",
+        "TMIx_JS0_ACTIVE",
+        "",
+        "TMIx_JS0_WAIT_READ",
+        "TMIx_JS0_WAIT_ISSUE",
+        "TMIx_JS0_WAIT_DEPEND",
+        "TMIx_JS0_WAIT_FINISH",
+        "TMIx_JS1_JOBS",
+        "TMIx_JS1_TASKS",
+        "TMIx_JS1_ACTIVE",
+        "",
+        "TMIx_JS1_WAIT_READ",
+        "TMIx_JS1_WAIT_ISSUE",
+        "TMIx_JS1_WAIT_DEPEND",
+        "TMIx_JS1_WAIT_FINISH",
+        "TMIx_JS2_JOBS",
+        "TMIx_JS2_TASKS",
+        "TMIx_JS2_ACTIVE",
+        "",
+        "TMIx_JS2_WAIT_READ",
+        "TMIx_JS2_WAIT_ISSUE",
+        "TMIx_JS2_WAIT_DEPEND",
+        "TMIx_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "",
+        "TMIx_TILER_ACTIVE",
+        "TMIx_JOBS_PROCESSED",
+        "TMIx_TRIANGLES",
+        "TMIx_LINES",
+        "TMIx_POINTS",
+        "TMIx_FRONT_FACING",
+        "TMIx_BACK_FACING",
+        "TMIx_PRIM_VISIBLE",
+        "TMIx_PRIM_CULLED",
+        "TMIx_PRIM_CLIPPED",
+        "TMIx_PRIM_SAT_CULLED",
+        "TMIx_BIN_ALLOC_INIT",
+        "TMIx_BIN_ALLOC_OVERFLOW",
+        "TMIx_BUS_READ",
+        "",
+        "TMIx_BUS_WRITE",
+        "TMIx_LOADING_DESC",
+        "TMIx_IDVS_POS_SHAD_REQ",
+        "TMIx_IDVS_POS_SHAD_WAIT",
+        "TMIx_IDVS_POS_SHAD_STALL",
+        "TMIx_IDVS_POS_FIFO_FULL",
+        "TMIx_PREFETCH_STALL",
+        "TMIx_VCACHE_HIT",
+        "TMIx_VCACHE_MISS",
+        "TMIx_VCACHE_LINE_WAIT",
+        "TMIx_VFETCH_POS_READ_WAIT",
+        "TMIx_VFETCH_VERTEX_WAIT",
+        "TMIx_VFETCH_STALL",
+        "TMIx_PRIMASSY_STALL",
+        "TMIx_BBOX_GEN_STALL",
+        "TMIx_IDVS_VBU_HIT",
+        "TMIx_IDVS_VBU_MISS",
+        "TMIx_IDVS_VBU_LINE_DEALLOCATE",
+        "TMIx_IDVS_VAR_SHAD_REQ",
+        "TMIx_IDVS_VAR_SHAD_STALL",
+        "TMIx_BINNER_STALL",
+        "TMIx_ITER_STALL",
+        "TMIx_COMPRESS_MISS",
+        "TMIx_COMPRESS_STALL",
+        "TMIx_PCACHE_HIT",
+        "TMIx_PCACHE_MISS",
+        "TMIx_PCACHE_MISS_STALL",
+        "TMIx_PCACHE_EVICT_STALL",
+        "TMIx_PMGR_PTR_WR_STALL",
+        "TMIx_PMGR_PTR_RD_STALL",
+        "TMIx_PMGR_CMD_WR_STALL",
+        "TMIx_WRBUF_ACTIVE",
+        "TMIx_WRBUF_HIT",
+        "TMIx_WRBUF_MISS",
+        "TMIx_WRBUF_NO_FREE_LINE_STALL",
+        "TMIx_WRBUF_NO_AXI_ID_STALL",
+        "TMIx_WRBUF_AXI_STALL",
+        "",
+        "",
+        "",
+        "TMIx_UTLB_TRANS",
+        "TMIx_UTLB_TRANS_HIT",
+        "TMIx_UTLB_TRANS_STALL",
+        "TMIx_UTLB_TRANS_MISS_DELAY",
+        "TMIx_UTLB_MMU_REQ",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "TMIx_FRAG_ACTIVE",
+        "TMIx_FRAG_PRIMITIVES",
+        "TMIx_FRAG_PRIM_RAST",
+        "TMIx_FRAG_FPK_ACTIVE",
+        "TMIx_FRAG_STARVING",
+        "TMIx_FRAG_WARPS",
+        "TMIx_FRAG_PARTIAL_WARPS",
+        "TMIx_FRAG_QUADS_RAST",
+        "TMIx_FRAG_QUADS_EZS_TEST",
+        "TMIx_FRAG_QUADS_EZS_UPDATE",
+        "TMIx_FRAG_QUADS_EZS_KILL",
+        "TMIx_FRAG_LZS_TEST",
+        "TMIx_FRAG_LZS_KILL",
+        "",
+        "TMIx_FRAG_PTILES",
+        "TMIx_FRAG_TRANS_ELIM",
+        "TMIx_QUAD_FPK_KILLER",
+        "",
+        "TMIx_COMPUTE_ACTIVE",
+        "TMIx_COMPUTE_TASKS",
+        "TMIx_COMPUTE_WARPS",
+        "TMIx_COMPUTE_STARVING",
+        "TMIx_EXEC_CORE_ACTIVE",
+        "TMIx_EXEC_ACTIVE",
+        "TMIx_EXEC_INSTR_COUNT",
+        "TMIx_EXEC_INSTR_DIVERGED",
+        "TMIx_EXEC_INSTR_STARVING",
+        "TMIx_ARITH_INSTR_SINGLE_FMA",
+        "TMIx_ARITH_INSTR_DOUBLE",
+        "TMIx_ARITH_INSTR_MSG",
+        "TMIx_ARITH_INSTR_MSG_ONLY",
+        "TMIx_TEX_INSTR",
+        "TMIx_TEX_INSTR_MIPMAP",
+        "TMIx_TEX_INSTR_COMPRESSED",
+        "TMIx_TEX_INSTR_3D",
+        "TMIx_TEX_INSTR_TRILINEAR",
+        "TMIx_TEX_COORD_ISSUE",
+        "TMIx_TEX_COORD_STALL",
+        "TMIx_TEX_STARVE_CACHE",
+        "TMIx_TEX_STARVE_FILTER",
+        "TMIx_LS_MEM_READ_FULL",
+        "TMIx_LS_MEM_READ_SHORT",
+        "TMIx_LS_MEM_WRITE_FULL",
+        "TMIx_LS_MEM_WRITE_SHORT",
+        "TMIx_LS_MEM_ATOMIC",
+        "TMIx_VARY_INSTR",
+        "TMIx_VARY_SLOT_32",
+        "TMIx_VARY_SLOT_16",
+        "TMIx_ATTR_INSTR",
+        "TMIx_ARITH_INSTR_FP_MUL",
+        "TMIx_BEATS_RD_FTC",
+        "TMIx_BEATS_RD_FTC_EXT",
+        "TMIx_BEATS_RD_LSC",
+        "TMIx_BEATS_RD_LSC_EXT",
+        "TMIx_BEATS_RD_TEX",
+        "TMIx_BEATS_RD_TEX_EXT",
+        "TMIx_BEATS_RD_OTHER",
+        "TMIx_BEATS_WR_LSC",
+        "TMIx_BEATS_WR_TIB",
+        "",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "TMIx_MMU_REQUESTS",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "TMIx_L2_RD_MSG_IN",
+        "TMIx_L2_RD_MSG_IN_STALL",
+        "TMIx_L2_WR_MSG_IN",
+        "TMIx_L2_WR_MSG_IN_STALL",
+        "TMIx_L2_SNP_MSG_IN",
+        "TMIx_L2_SNP_MSG_IN_STALL",
+        "TMIx_L2_RD_MSG_OUT",
+        "TMIx_L2_RD_MSG_OUT_STALL",
+        "TMIx_L2_WR_MSG_OUT",
+        "TMIx_L2_ANY_LOOKUP",
+        "TMIx_L2_READ_LOOKUP",
+        "TMIx_L2_WRITE_LOOKUP",
+        "TMIx_L2_EXT_SNOOP_LOOKUP",
+        "TMIx_L2_EXT_READ",
+        "TMIx_L2_EXT_READ_NOSNP",
+        "TMIx_L2_EXT_READ_UNIQUE",
+        "TMIx_L2_EXT_READ_BEATS",
+        "TMIx_L2_EXT_AR_STALL",
+        "TMIx_L2_EXT_AR_CNT_Q1",
+        "TMIx_L2_EXT_AR_CNT_Q2",
+        "TMIx_L2_EXT_AR_CNT_Q3",
+        "TMIx_L2_EXT_RRESP_0_127",
+        "TMIx_L2_EXT_RRESP_128_191",
+        "TMIx_L2_EXT_RRESP_192_255",
+        "TMIx_L2_EXT_RRESP_256_319",
+        "TMIx_L2_EXT_RRESP_320_383",
+        "TMIx_L2_EXT_WRITE",
+        "TMIx_L2_EXT_WRITE_NOSNP_FULL",
+        "TMIx_L2_EXT_WRITE_NOSNP_PTL",
+        "TMIx_L2_EXT_WRITE_SNP_FULL",
+        "TMIx_L2_EXT_WRITE_SNP_PTL",
+        "TMIx_L2_EXT_WRITE_BEATS",
+        "TMIx_L2_EXT_W_STALL",
+        "TMIx_L2_EXT_AW_CNT_Q1",
+        "TMIx_L2_EXT_AW_CNT_Q2",
+        "TMIx_L2_EXT_AW_CNT_Q3",
+        "TMIx_L2_EXT_SNOOP",
+        "TMIx_L2_EXT_SNOOP_STALL",
+        "TMIx_L2_EXT_SNOOP_RESP_CLEAN",
+        "TMIx_L2_EXT_SNOOP_RESP_DATA",
+        "TMIx_L2_EXT_SNOOP_INTERNAL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    };
+
+    static const char * const hardware_counters_mali_tDVx[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "TDVx_MESSAGES_SENT",
+        "TDVx_MESSAGES_RECEIVED",
+        "TDVx_GPU_ACTIVE",
+        "TDVx_IRQ_ACTIVE",
+        "TDVx_JS0_JOBS",
+        "TDVx_JS0_TASKS",
+        "TDVx_JS0_ACTIVE",
+        "TDVx_JS0_WAIT_FLUSH",
+        "TDVx_JS0_WAIT_READ",
+        "TDVx_JS0_WAIT_ISSUE",
+        "TDVx_JS0_WAIT_DEPEND",
+        "TDVx_JS0_WAIT_FINISH",
+        "TDVx_JS1_JOBS",
+        "TDVx_JS1_TASKS",
+        "TDVx_JS1_ACTIVE",
+        "TDVx_JS1_WAIT_FLUSH",
+        "TDVx_JS1_WAIT_READ",
+        "TDVx_JS1_WAIT_ISSUE",
+        "TDVx_JS1_WAIT_DEPEND",
+        "TDVx_JS1_WAIT_FINISH",
+        "TDVx_JS2_JOBS",
+        "TDVx_JS2_TASKS",
+        "TDVx_JS2_ACTIVE",
+        "TDVx_JS2_WAIT_FLUSH",
+        "TDVx_JS2_WAIT_READ",
+        "TDVx_JS2_WAIT_ISSUE",
+        "TDVx_JS2_WAIT_DEPEND",
+        "TDVx_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "TDVx_CACHE_FLUSH",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "",
+        "TDVx_TILER_ACTIVE",
+        "TDVx_JOBS_PROCESSED",
+        "TDVx_TRIANGLES",
+        "TDVx_LINES",
+        "TDVx_POINTS",
+        "TDVx_FRONT_FACING",
+        "TDVx_BACK_FACING",
+        "TDVx_PRIM_VISIBLE",
+        "TDVx_PRIM_CULLED",
+        "TDVx_PRIM_CLIPPED",
+        "TDVx_PRIM_SAT_CULLED",
+        "TDVx_BIN_ALLOC_INIT",
+        "TDVx_BIN_ALLOC_OVERFLOW",
+        "TDVx_BUS_READ",
+        "",
+        "TDVx_BUS_WRITE",
+        "TDVx_LOADING_DESC",
+        "TDVx_IDVS_POS_SHAD_REQ",
+        "TDVx_IDVS_POS_SHAD_WAIT",
+        "TDVx_IDVS_POS_SHAD_STALL",
+        "TDVx_IDVS_POS_FIFO_FULL",
+        "TDVx_PREFETCH_STALL",
+        "TDVx_VCACHE_HIT",
+        "TDVx_VCACHE_MISS",
+        "TDVx_VCACHE_LINE_WAIT",
+        "TDVx_VFETCH_POS_READ_WAIT",
+        "TDVx_VFETCH_VERTEX_WAIT",
+        "TDVx_VFETCH_STALL",
+        "TDVx_PRIMASSY_STALL",
+        "TDVx_BBOX_GEN_STALL",
+        "TDVx_IDVS_VBU_HIT",
+        "TDVx_IDVS_VBU_MISS",
+        "TDVx_IDVS_VBU_LINE_DEALLOCATE",
+        "TDVx_IDVS_VAR_SHAD_REQ",
+        "TDVx_IDVS_VAR_SHAD_STALL",
+        "TDVx_BINNER_STALL",
+        "TDVx_ITER_STALL",
+        "TDVx_COMPRESS_MISS",
+        "TDVx_COMPRESS_STALL",
+        "TDVx_PCACHE_HIT",
+        "TDVx_PCACHE_MISS",
+        "TDVx_PCACHE_MISS_STALL",
+        "TDVx_PCACHE_EVICT_STALL",
+        "TDVx_PMGR_PTR_WR_STALL",
+        "TDVx_PMGR_PTR_RD_STALL",
+        "TDVx_PMGR_CMD_WR_STALL",
+        "TDVx_WRBUF_ACTIVE",
+        "TDVx_WRBUF_HIT",
+        "TDVx_WRBUF_MISS",
+        "TDVx_WRBUF_NO_FREE_LINE_STALL",
+        "TDVx_WRBUF_NO_AXI_ID_STALL",
+        "TDVx_WRBUF_AXI_STALL",
+        "",
+        "",
+        "",
+        "TDVx_UTLB_TRANS",
+        "TDVx_UTLB_TRANS_HIT",
+        "TDVx_UTLB_TRANS_STALL",
+        "TDVx_UTLB_TRANS_MISS_DELAY",
+        "TDVx_UTLB_MMU_REQ",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "TDVx_FRAG_ACTIVE",
+        "TDVx_FRAG_PRIMITIVES",
+        "TDVx_FRAG_PRIM_RAST",
+        "TDVx_FRAG_FPK_ACTIVE",
+        "TDVx_FRAG_STARVING",
+        "TDVx_FRAG_WARPS",
+        "TDVx_FRAG_PARTIAL_WARPS",
+        "TDVx_FRAG_QUADS_RAST",
+        "TDVx_FRAG_QUADS_EZS_TEST",
+        "TDVx_FRAG_QUADS_EZS_UPDATE",
+        "TDVx_FRAG_QUADS_EZS_KILL",
+        "TDVx_FRAG_LZS_TEST",
+        "TDVx_FRAG_LZS_KILL",
+        "",
+        "TDVx_FRAG_PTILES",
+        "TDVx_FRAG_TRANS_ELIM",
+        "TDVx_QUAD_FPK_KILLER",
+        "",
+        "TDVx_COMPUTE_ACTIVE",
+        "TDVx_COMPUTE_TASKS",
+        "TDVx_COMPUTE_WARPS",
+        "TDVx_COMPUTE_STARVING",
+        "TDVx_EXEC_CORE_ACTIVE",
+        "TDVx_EXEC_ACTIVE",
+        "TDVx_EXEC_INSTR_COUNT",
+        "TDVx_EXEC_INSTR_DIVERGED",
+        "TDVx_EXEC_INSTR_STARVING",
+        "TDVx_ARITH_INSTR_SINGLE_FMA",
+        "TDVx_ARITH_INSTR_DOUBLE",
+        "TDVx_ARITH_INSTR_MSG",
+        "TDVx_ARITH_INSTR_MSG_ONLY",
+        "TDVx_TEX_MSGI_NUM_QUADS",
+        "TDVx_TEX_DFCH_NUM_PASSES",
+        "TDVx_TEX_DFCH_NUM_PASSES_MISS",
+        "TDVx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+        "TDVx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+        "TDVx_TEX_TFCH_NUM_LINES_FETCHED",
+        "TDVx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED",
+        "TDVx_TEX_TFCH_NUM_OPERATIONS",
+        "TDVx_TEX_FILT_NUM_OPERATIONS",
+        "TDVx_LS_MEM_READ_FULL",
+        "TDVx_LS_MEM_READ_SHORT",
+        "TDVx_LS_MEM_WRITE_FULL",
+        "TDVx_LS_MEM_WRITE_SHORT",
+        "TDVx_LS_MEM_ATOMIC",
+        "TDVx_VARY_INSTR",
+        "TDVx_VARY_SLOT_32",
+        "TDVx_VARY_SLOT_16",
+        "TDVx_ATTR_INSTR",
+        "TDVx_ARITH_INSTR_FP_MUL",
+        "TDVx_BEATS_RD_FTC",
+        "TDVx_BEATS_RD_FTC_EXT",
+        "TDVx_BEATS_RD_LSC",
+        "TDVx_BEATS_RD_LSC_EXT",
+        "TDVx_BEATS_RD_TEX",
+        "TDVx_BEATS_RD_TEX_EXT",
+        "TDVx_BEATS_RD_OTHER",
+        "TDVx_BEATS_WR_LSC_OTHER",
+        "TDVx_BEATS_WR_TIB",
+        "TDVx_BEATS_WR_LSC_WB",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "TDVx_MMU_REQUESTS",
+        "TDVx_MMU_TABLE_READS_L3",
+        "TDVx_MMU_TABLE_READS_L2",
+        "TDVx_MMU_HIT_L3",
+        "TDVx_MMU_HIT_L2",
+        "TDVx_MMU_S2_REQUESTS",
+        "TDVx_MMU_S2_TABLE_READS_L3",
+        "TDVx_MMU_S2_TABLE_READS_L2",
+        "TDVx_MMU_S2_HIT_L3",
+        "TDVx_MMU_S2_HIT_L2",
+        "",
+        "",
+        "TDVx_L2_RD_MSG_IN",
+        "TDVx_L2_RD_MSG_IN_STALL",
+        "TDVx_L2_WR_MSG_IN",
+        "TDVx_L2_WR_MSG_IN_STALL",
+        "TDVx_L2_SNP_MSG_IN",
+        "TDVx_L2_SNP_MSG_IN_STALL",
+        "TDVx_L2_RD_MSG_OUT",
+        "TDVx_L2_RD_MSG_OUT_STALL",
+        "TDVx_L2_WR_MSG_OUT",
+        "TDVx_L2_ANY_LOOKUP",
+        "TDVx_L2_READ_LOOKUP",
+        "TDVx_L2_WRITE_LOOKUP",
+        "TDVx_L2_EXT_SNOOP_LOOKUP",
+        "TDVx_L2_EXT_READ",
+        "TDVx_L2_EXT_READ_NOSNP",
+        "TDVx_L2_EXT_READ_UNIQUE",
+        "TDVx_L2_EXT_READ_BEATS",
+        "TDVx_L2_EXT_AR_STALL",
+        "TDVx_L2_EXT_AR_CNT_Q1",
+        "TDVx_L2_EXT_AR_CNT_Q2",
+        "TDVx_L2_EXT_AR_CNT_Q3",
+        "TDVx_L2_EXT_RRESP_0_127",
+        "TDVx_L2_EXT_RRESP_128_191",
+        "TDVx_L2_EXT_RRESP_192_255",
+        "TDVx_L2_EXT_RRESP_256_319",
+        "TDVx_L2_EXT_RRESP_320_383",
+        "TDVx_L2_EXT_WRITE",
+        "TDVx_L2_EXT_WRITE_NOSNP_FULL",
+        "TDVx_L2_EXT_WRITE_NOSNP_PTL",
+        "TDVx_L2_EXT_WRITE_SNP_FULL",
+        "TDVx_L2_EXT_WRITE_SNP_PTL",
+        "TDVx_L2_EXT_WRITE_BEATS",
+        "TDVx_L2_EXT_W_STALL",
+        "TDVx_L2_EXT_AW_CNT_Q1",
+        "TDVx_L2_EXT_AW_CNT_Q2",
+        "TDVx_L2_EXT_AW_CNT_Q3",
+        "TDVx_L2_EXT_SNOOP",
+        "TDVx_L2_EXT_SNOOP_STALL",
+        "TDVx_L2_EXT_SNOOP_RESP_CLEAN",
+        "TDVx_L2_EXT_SNOOP_RESP_DATA",
+        "TDVx_L2_EXT_SNOOP_INTERNAL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    };
+
+    static const char * const hardware_counters_mali_tSIx[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "TSIx_MESSAGES_SENT",
+        "TSIx_MESSAGES_RECEIVED",
+        "TSIx_GPU_ACTIVE",
+        "TSIx_IRQ_ACTIVE",
+        "TSIx_JS0_JOBS",
+        "TSIx_JS0_TASKS",
+        "TSIx_JS0_ACTIVE",
+        "TSIx_JS0_WAIT_FLUSH",
+        "TSIx_JS0_WAIT_READ",
+        "TSIx_JS0_WAIT_ISSUE",
+        "TSIx_JS0_WAIT_DEPEND",
+        "TSIx_JS0_WAIT_FINISH",
+        "TSIx_JS1_JOBS",
+        "TSIx_JS1_TASKS",
+        "TSIx_JS1_ACTIVE",
+        "TSIx_JS1_WAIT_FLUSH",
+        "TSIx_JS1_WAIT_READ",
+        "TSIx_JS1_WAIT_ISSUE",
+        "TSIx_JS1_WAIT_DEPEND",
+        "TSIx_JS1_WAIT_FINISH",
+        "TSIx_JS2_JOBS",
+        "TSIx_JS2_TASKS",
+        "TSIx_JS2_ACTIVE",
+        "TSIx_JS2_WAIT_FLUSH",
+        "TSIx_JS2_WAIT_READ",
+        "TSIx_JS2_WAIT_ISSUE",
+        "TSIx_JS2_WAIT_DEPEND",
+        "TSIx_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "",
+        "TSIx_TILER_ACTIVE",
+        "TSIx_JOBS_PROCESSED",
+        "TSIx_TRIANGLES",
+        "TSIx_LINES",
+        "TSIx_POINTS",
+        "TSIx_FRONT_FACING",
+        "TSIx_BACK_FACING",
+        "TSIx_PRIM_VISIBLE",
+        "TSIx_PRIM_CULLED",
+        "TSIx_PRIM_CLIPPED",
+        "TSIx_PRIM_SAT_CULLED",
+        "TSIx_BIN_ALLOC_INIT",
+        "TSIx_BIN_ALLOC_OVERFLOW",
+        "TSIx_BUS_READ",
+        "",
+        "TSIx_BUS_WRITE",
+        "TSIx_LOADING_DESC",
+        "TSIx_IDVS_POS_SHAD_REQ",
+        "TSIx_IDVS_POS_SHAD_WAIT",
+        "TSIx_IDVS_POS_SHAD_STALL",
+        "TSIx_IDVS_POS_FIFO_FULL",
+        "TSIx_PREFETCH_STALL",
+        "TSIx_VCACHE_HIT",
+        "TSIx_VCACHE_MISS",
+        "TSIx_VCACHE_LINE_WAIT",
+        "TSIx_VFETCH_POS_READ_WAIT",
+        "TSIx_VFETCH_VERTEX_WAIT",
+        "TSIx_VFETCH_STALL",
+        "TSIx_PRIMASSY_STALL",
+        "TSIx_BBOX_GEN_STALL",
+        "TSIx_IDVS_VBU_HIT",
+        "TSIx_IDVS_VBU_MISS",
+        "TSIx_IDVS_VBU_LINE_DEALLOCATE",
+        "TSIx_IDVS_VAR_SHAD_REQ",
+        "TSIx_IDVS_VAR_SHAD_STALL",
+        "TSIx_BINNER_STALL",
+        "TSIx_ITER_STALL",
+        "TSIx_COMPRESS_MISS",
+        "TSIx_COMPRESS_STALL",
+        "TSIx_PCACHE_HIT",
+        "TSIx_PCACHE_MISS",
+        "TSIx_PCACHE_MISS_STALL",
+        "TSIx_PCACHE_EVICT_STALL",
+        "TSIx_PMGR_PTR_WR_STALL",
+        "TSIx_PMGR_PTR_RD_STALL",
+        "TSIx_PMGR_CMD_WR_STALL",
+        "TSIx_WRBUF_ACTIVE",
+        "TSIx_WRBUF_HIT",
+        "TSIx_WRBUF_MISS",
+        "TSIx_WRBUF_NO_FREE_LINE_STALL",
+        "TSIx_WRBUF_NO_AXI_ID_STALL",
+        "TSIx_WRBUF_AXI_STALL",
+        "",
+        "",
+        "",
+        "TSIx_UTLB_TRANS",
+        "TSIx_UTLB_TRANS_HIT",
+        "TSIx_UTLB_TRANS_STALL",
+        "TSIx_UTLB_TRANS_MISS_DELAY",
+        "TSIx_UTLB_MMU_REQ",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "TSIx_FRAG_ACTIVE",
+        "TSIx_FRAG_PRIMITIVES",
+        "TSIx_FRAG_PRIM_RAST",
+        "TSIx_FRAG_FPK_ACTIVE",
+        "TSIx_FRAG_STARVING",
+        "TSIx_FRAG_WARPS",
+        "TSIx_FRAG_PARTIAL_WARPS",
+        "TSIx_FRAG_QUADS_RAST",
+        "TSIx_FRAG_QUADS_EZS_TEST",
+        "TSIx_FRAG_QUADS_EZS_UPDATE",
+        "TSIx_FRAG_QUADS_EZS_KILL",
+        "TSIx_FRAG_LZS_TEST",
+        "TSIx_FRAG_LZS_KILL",
+        "",
+        "TSIx_FRAG_PTILES",
+        "TSIx_FRAG_TRANS_ELIM",
+        "TSIx_QUAD_FPK_KILLER",
+        "",
+        "TSIx_COMPUTE_ACTIVE",
+        "TSIx_COMPUTE_TASKS",
+        "TSIx_COMPUTE_WARPS",
+        "TSIx_COMPUTE_STARVING",
+        "TSIx_EXEC_CORE_ACTIVE",
+        "TSIx_EXEC_ACTIVE",
+        "TSIx_EXEC_INSTR_COUNT",
+        "TSIx_EXEC_INSTR_DIVERGED",
+        "TSIx_EXEC_INSTR_STARVING",
+        "TSIx_ARITH_INSTR_SINGLE_FMA",
+        "TSIx_ARITH_INSTR_DOUBLE",
+        "TSIx_ARITH_INSTR_MSG",
+        "TSIx_ARITH_INSTR_MSG_ONLY",
+        "TSIx_TEX_MSGI_NUM_QUADS",
+        "TSIx_TEX_DFCH_NUM_PASSES",
+        "TSIx_TEX_DFCH_NUM_PASSES_MISS",
+        "TSIx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+        "TSIx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+        "TSIx_TEX_TFCH_NUM_LINES_FETCHED",
+        "TSIx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED",
+        "TSIx_TEX_TFCH_NUM_OPERATIONS",
+        "TSIx_TEX_FILT_NUM_OPERATIONS",
+        "TSIx_LS_MEM_READ_FULL",
+        "TSIx_LS_MEM_READ_SHORT",
+        "TSIx_LS_MEM_WRITE_FULL",
+        "TSIx_LS_MEM_WRITE_SHORT",
+        "TSIx_LS_MEM_ATOMIC",
+        "TSIx_VARY_INSTR",
+        "TSIx_VARY_SLOT_32",
+        "TSIx_VARY_SLOT_16",
+        "TSIx_ATTR_INSTR",
+        "TSIx_ARITH_INSTR_FP_MUL",
+        "TSIx_BEATS_RD_FTC",
+        "TSIx_BEATS_RD_FTC_EXT",
+        "TSIx_BEATS_RD_LSC",
+        "TSIx_BEATS_RD_LSC_EXT",
+        "TSIx_BEATS_RD_TEX",
+        "TSIx_BEATS_RD_TEX_EXT",
+        "TSIx_BEATS_RD_OTHER",
+        "TSIx_BEATS_WR_LSC_OTHER",
+        "TSIx_BEATS_WR_TIB",
+        "TSIx_BEATS_WR_LSC_WB",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "TSIx_MMU_REQUESTS",
+        "TSIx_MMU_TABLE_READS_L3",
+        "TSIx_MMU_TABLE_READS_L2",
+        "TSIx_MMU_HIT_L3",
+        "TSIx_MMU_HIT_L2",
+        "TSIx_MMU_S2_REQUESTS",
+        "TSIx_MMU_S2_TABLE_READS_L3",
+        "TSIx_MMU_S2_TABLE_READS_L2",
+        "TSIx_MMU_S2_HIT_L3",
+        "TSIx_MMU_S2_HIT_L2",
+        "",
+        "",
+        "TSIx_L2_RD_MSG_IN",
+        "TSIx_L2_RD_MSG_IN_STALL",
+        "TSIx_L2_WR_MSG_IN",
+        "TSIx_L2_WR_MSG_IN_STALL",
+        "TSIx_L2_SNP_MSG_IN",
+        "TSIx_L2_SNP_MSG_IN_STALL",
+        "TSIx_L2_RD_MSG_OUT",
+        "TSIx_L2_RD_MSG_OUT_STALL",
+        "TSIx_L2_WR_MSG_OUT",
+        "TSIx_L2_ANY_LOOKUP",
+        "TSIx_L2_READ_LOOKUP",
+        "TSIx_L2_WRITE_LOOKUP",
+        "TSIx_L2_EXT_SNOOP_LOOKUP",
+        "TSIx_L2_EXT_READ",
+        "TSIx_L2_EXT_READ_NOSNP",
+        "TSIx_L2_EXT_READ_UNIQUE",
+        "TSIx_L2_EXT_READ_BEATS",
+        "TSIx_L2_EXT_AR_STALL",
+        "TSIx_L2_EXT_AR_CNT_Q1",
+        "TSIx_L2_EXT_AR_CNT_Q2",
+        "TSIx_L2_EXT_AR_CNT_Q3",
+        "TSIx_L2_EXT_RRESP_0_127",
+        "TSIx_L2_EXT_RRESP_128_191",
+        "TSIx_L2_EXT_RRESP_192_255",
+        "TSIx_L2_EXT_RRESP_256_319",
+        "TSIx_L2_EXT_RRESP_320_383",
+        "TSIx_L2_EXT_WRITE",
+        "TSIx_L2_EXT_WRITE_NOSNP_FULL",
+        "TSIx_L2_EXT_WRITE_NOSNP_PTL",
+        "TSIx_L2_EXT_WRITE_SNP_FULL",
+        "TSIx_L2_EXT_WRITE_SNP_PTL",
+        "TSIx_L2_EXT_WRITE_BEATS",
+        "TSIx_L2_EXT_W_STALL",
+        "TSIx_L2_EXT_AW_CNT_Q1",
+        "TSIx_L2_EXT_AW_CNT_Q2",
+        "TSIx_L2_EXT_AW_CNT_Q3",
+        "TSIx_L2_EXT_SNOOP",
+        "TSIx_L2_EXT_SNOOP_STALL",
+        "TSIx_L2_EXT_SNOOP_RESP_CLEAN",
+        "TSIx_L2_EXT_SNOOP_RESP_DATA",
+        "TSIx_L2_EXT_SNOOP_INTERNAL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    };
+
+    static const char * const hardware_counters_mali_tNOx[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "TNOx_MESSAGES_SENT",
+        "TNOx_MESSAGES_RECEIVED",
+        "TNOx_GPU_ACTIVE",
+        "TNOx_IRQ_ACTIVE",
+        "TNOx_JS0_JOBS",
+        "TNOx_JS0_TASKS",
+        "TNOx_JS0_ACTIVE",
+        "TNOx_JS0_WAIT_FLUSH",
+        "TNOx_JS0_WAIT_READ",
+        "TNOx_JS0_WAIT_ISSUE",
+        "TNOx_JS0_WAIT_DEPEND",
+        "TNOx_JS0_WAIT_FINISH",
+        "TNOx_JS1_JOBS",
+        "TNOx_JS1_TASKS",
+        "TNOx_JS1_ACTIVE",
+        "TNOx_JS1_WAIT_FLUSH",
+        "TNOx_JS1_WAIT_READ",
+        "TNOx_JS1_WAIT_ISSUE",
+        "TNOx_JS1_WAIT_DEPEND",
+        "TNOx_JS1_WAIT_FINISH",
+        "TNOx_JS2_JOBS",
+        "TNOx_JS2_TASKS",
+        "TNOx_JS2_ACTIVE",
+        "TNOx_JS2_WAIT_FLUSH",
+        "TNOx_JS2_WAIT_READ",
+        "TNOx_JS2_WAIT_ISSUE",
+        "TNOx_JS2_WAIT_DEPEND",
+        "TNOx_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "TNOx_CACHE_FLUSH",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "",
+        "TNOx_TILER_ACTIVE",
+        "TNOx_JOBS_PROCESSED",
+        "TNOx_TRIANGLES",
+        "TNOx_LINES",
+        "TNOx_POINTS",
+        "TNOx_FRONT_FACING",
+        "TNOx_BACK_FACING",
+        "TNOx_PRIM_VISIBLE",
+        "TNOx_PRIM_CULLED",
+        "TNOx_PRIM_CLIPPED",
+        "TNOx_PRIM_SAT_CULLED",
+        "TNOx_BIN_ALLOC_INIT",
+        "TNOx_BIN_ALLOC_OVERFLOW",
+        "TNOx_BUS_READ",
+        "",
+        "TNOx_BUS_WRITE",
+        "TNOx_LOADING_DESC",
+        "TNOx_IDVS_POS_SHAD_REQ",
+        "TNOx_IDVS_POS_SHAD_WAIT",
+        "TNOx_IDVS_POS_SHAD_STALL",
+        "TNOx_IDVS_POS_FIFO_FULL",
+        "TNOx_PREFETCH_STALL",
+        "TNOx_VCACHE_HIT",
+        "TNOx_VCACHE_MISS",
+        "TNOx_VCACHE_LINE_WAIT",
+        "TNOx_VFETCH_POS_READ_WAIT",
+        "TNOx_VFETCH_VERTEX_WAIT",
+        "TNOx_VFETCH_STALL",
+        "TNOx_PRIMASSY_STALL",
+        "TNOx_BBOX_GEN_STALL",
+        "TNOx_IDVS_VBU_HIT",
+        "TNOx_IDVS_VBU_MISS",
+        "TNOx_IDVS_VBU_LINE_DEALLOCATE",
+        "TNOx_IDVS_VAR_SHAD_REQ",
+        "TNOx_IDVS_VAR_SHAD_STALL",
+        "TNOx_BINNER_STALL",
+        "TNOx_ITER_STALL",
+        "TNOx_COMPRESS_MISS",
+        "TNOx_COMPRESS_STALL",
+        "TNOx_PCACHE_HIT",
+        "TNOx_PCACHE_MISS",
+        "TNOx_PCACHE_MISS_STALL",
+        "TNOx_PCACHE_EVICT_STALL",
+        "TNOx_PMGR_PTR_WR_STALL",
+        "TNOx_PMGR_PTR_RD_STALL",
+        "TNOx_PMGR_CMD_WR_STALL",
+        "TNOx_WRBUF_ACTIVE",
+        "TNOx_WRBUF_HIT",
+        "TNOx_WRBUF_MISS",
+        "TNOx_WRBUF_NO_FREE_LINE_STALL",
+        "TNOx_WRBUF_NO_AXI_ID_STALL",
+        "TNOx_WRBUF_AXI_STALL",
+        "",
+        "",
+        "",
+        "TNOx_UTLB_TRANS",
+        "TNOx_UTLB_TRANS_HIT",
+        "TNOx_UTLB_TRANS_STALL",
+        "TNOx_UTLB_TRANS_MISS_DELAY",
+        "TNOx_UTLB_MMU_REQ",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "TNOx_FRAG_ACTIVE",
+        "TNOx_FRAG_PRIMITIVES",
+        "TNOx_FRAG_PRIM_RAST",
+        "TNOx_FRAG_FPK_ACTIVE",
+        "TNOx_FRAG_STARVING",
+        "TNOx_FRAG_WARPS",
+        "TNOx_FRAG_PARTIAL_WARPS",
+        "TNOx_FRAG_QUADS_RAST",
+        "TNOx_FRAG_QUADS_EZS_TEST",
+        "TNOx_FRAG_QUADS_EZS_UPDATE",
+        "TNOx_FRAG_QUADS_EZS_KILL",
+        "TNOx_FRAG_LZS_TEST",
+        "TNOx_FRAG_LZS_KILL",
+        "TNOx_WARP_REG_SIZE_64",
+        "TNOx_FRAG_PTILES",
+        "TNOx_FRAG_TRANS_ELIM",
+        "TNOx_QUAD_FPK_KILLER",
+        "TNOx_FULL_QUAD_WARPS",
+        "TNOx_COMPUTE_ACTIVE",
+        "TNOx_COMPUTE_TASKS",
+        "TNOx_COMPUTE_WARPS",
+        "TNOx_COMPUTE_STARVING",
+        "TNOx_EXEC_CORE_ACTIVE",
+        "TNOx_EXEC_ACTIVE",
+        "TNOx_EXEC_INSTR_COUNT",
+        "TNOx_EXEC_INSTR_DIVERGED",
+        "TNOx_EXEC_INSTR_STARVING",
+        "TNOx_ARITH_INSTR_SINGLE_FMA",
+        "TNOx_ARITH_INSTR_DOUBLE",
+        "TNOx_ARITH_INSTR_MSG",
+        "TNOx_ARITH_INSTR_MSG_ONLY",
+        "TNOx_TEX_MSGI_NUM_QUADS",
+        "TNOx_TEX_DFCH_NUM_PASSES",
+        "TNOx_TEX_DFCH_NUM_PASSES_MISS",
+        "TNOx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+        "TNOx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+        "TNOx_TEX_TFCH_NUM_LINES_FETCHED",
+        "TNOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED",
+        "TNOx_TEX_TFCH_NUM_OPERATIONS",
+        "TNOx_TEX_FILT_NUM_OPERATIONS",
+        "TNOx_LS_MEM_READ_FULL",
+        "TNOx_LS_MEM_READ_SHORT",
+        "TNOx_LS_MEM_WRITE_FULL",
+        "TNOx_LS_MEM_WRITE_SHORT",
+        "TNOx_LS_MEM_ATOMIC",
+        "TNOx_VARY_INSTR",
+        "TNOx_VARY_SLOT_32",
+        "TNOx_VARY_SLOT_16",
+        "TNOx_ATTR_INSTR",
+        "TNOx_ARITH_INSTR_FP_MUL",
+        "TNOx_BEATS_RD_FTC",
+        "TNOx_BEATS_RD_FTC_EXT",
+        "TNOx_BEATS_RD_LSC",
+        "TNOx_BEATS_RD_LSC_EXT",
+        "TNOx_BEATS_RD_TEX",
+        "TNOx_BEATS_RD_TEX_EXT",
+        "TNOx_BEATS_RD_OTHER",
+        "TNOx_BEATS_WR_LSC_OTHER",
+        "TNOx_BEATS_WR_TIB",
+        "TNOx_BEATS_WR_LSC_WB",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "TNOx_MMU_REQUESTS",
+        "TNOx_MMU_TABLE_READS_L3",
+        "TNOx_MMU_TABLE_READS_L2",
+        "TNOx_MMU_HIT_L3",
+        "TNOx_MMU_HIT_L2",
+        "TNOx_MMU_S2_REQUESTS",
+        "TNOx_MMU_S2_TABLE_READS_L3",
+        "TNOx_MMU_S2_TABLE_READS_L2",
+        "TNOx_MMU_S2_HIT_L3",
+        "TNOx_MMU_S2_HIT_L2",
+        "",
+        "",
+        "TNOx_L2_RD_MSG_IN",
+        "TNOx_L2_RD_MSG_IN_STALL",
+        "TNOx_L2_WR_MSG_IN",
+        "TNOx_L2_WR_MSG_IN_STALL",
+        "TNOx_L2_SNP_MSG_IN",
+        "TNOx_L2_SNP_MSG_IN_STALL",
+        "TNOx_L2_RD_MSG_OUT",
+        "TNOx_L2_RD_MSG_OUT_STALL",
+        "TNOx_L2_WR_MSG_OUT",
+        "TNOx_L2_ANY_LOOKUP",
+        "TNOx_L2_READ_LOOKUP",
+        "TNOx_L2_WRITE_LOOKUP",
+        "TNOx_L2_EXT_SNOOP_LOOKUP",
+        "TNOx_L2_EXT_READ",
+        "TNOx_L2_EXT_READ_NOSNP",
+        "TNOx_L2_EXT_READ_UNIQUE",
+        "TNOx_L2_EXT_READ_BEATS",
+        "TNOx_L2_EXT_AR_STALL",
+        "TNOx_L2_EXT_AR_CNT_Q1",
+        "TNOx_L2_EXT_AR_CNT_Q2",
+        "TNOx_L2_EXT_AR_CNT_Q3",
+        "TNOx_L2_EXT_RRESP_0_127",
+        "TNOx_L2_EXT_RRESP_128_191",
+        "TNOx_L2_EXT_RRESP_192_255",
+        "TNOx_L2_EXT_RRESP_256_319",
+        "TNOx_L2_EXT_RRESP_320_383",
+        "TNOx_L2_EXT_WRITE",
+        "TNOx_L2_EXT_WRITE_NOSNP_FULL",
+        "TNOx_L2_EXT_WRITE_NOSNP_PTL",
+        "TNOx_L2_EXT_WRITE_SNP_FULL",
+        "TNOx_L2_EXT_WRITE_SNP_PTL",
+        "TNOx_L2_EXT_WRITE_BEATS",
+        "TNOx_L2_EXT_W_STALL",
+        "TNOx_L2_EXT_AW_CNT_Q1",
+        "TNOx_L2_EXT_AW_CNT_Q2",
+        "TNOx_L2_EXT_AW_CNT_Q3",
+        "TNOx_L2_EXT_SNOOP",
+        "TNOx_L2_EXT_SNOOP_STALL",
+        "TNOx_L2_EXT_SNOOP_RESP_CLEAN",
+        "TNOx_L2_EXT_SNOOP_RESP_DATA",
+        "TNOx_L2_EXT_SNOOP_INTERNAL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    };
+
+    static const char * const hardware_counters_mali_tGOx[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "TGOx_MESSAGES_SENT",
+        "TGOx_MESSAGES_RECEIVED",
+        "TGOx_GPU_ACTIVE",
+        "TGOx_IRQ_ACTIVE",
+        "TGOx_JS0_JOBS",
+        "TGOx_JS0_TASKS",
+        "TGOx_JS0_ACTIVE",
+        "TGOx_JS0_WAIT_FLUSH",
+        "TGOx_JS0_WAIT_READ",
+        "TGOx_JS0_WAIT_ISSUE",
+        "TGOx_JS0_WAIT_DEPEND",
+        "TGOx_JS0_WAIT_FINISH",
+        "TGOx_JS1_JOBS",
+        "TGOx_JS1_TASKS",
+        "TGOx_JS1_ACTIVE",
+        "TGOx_JS1_WAIT_FLUSH",
+        "TGOx_JS1_WAIT_READ",
+        "TGOx_JS1_WAIT_ISSUE",
+        "TGOx_JS1_WAIT_DEPEND",
+        "TGOx_JS1_WAIT_FINISH",
+        "TGOx_JS2_JOBS",
+        "TGOx_JS2_TASKS",
+        "TGOx_JS2_ACTIVE",
+        "TGOx_JS2_WAIT_FLUSH",
+        "TGOx_JS2_WAIT_READ",
+        "TGOx_JS2_WAIT_ISSUE",
+        "TGOx_JS2_WAIT_DEPEND",
+        "TGOx_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "TGOx_CACHE_FLUSH",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "",
+        "TGOx_TILER_ACTIVE",
+        "TGOx_JOBS_PROCESSED",
+        "TGOx_TRIANGLES",
+        "TGOx_LINES",
+        "TGOx_POINTS",
+        "TGOx_FRONT_FACING",
+        "TGOx_BACK_FACING",
+        "TGOx_PRIM_VISIBLE",
+        "TGOx_PRIM_CULLED",
+        "TGOx_PRIM_CLIPPED",
+        "TGOx_PRIM_SAT_CULLED",
+        "TGOx_BIN_ALLOC_INIT",
+        "TGOx_BIN_ALLOC_OVERFLOW",
+        "TGOx_BUS_READ",
+        "",
+        "TGOx_BUS_WRITE",
+        "TGOx_LOADING_DESC",
+        "TGOx_IDVS_POS_SHAD_REQ",
+        "TGOx_IDVS_POS_SHAD_WAIT",
+        "TGOx_IDVS_POS_SHAD_STALL",
+        "TGOx_IDVS_POS_FIFO_FULL",
+        "TGOx_PREFETCH_STALL",
+        "TGOx_VCACHE_HIT",
+        "TGOx_VCACHE_MISS",
+        "TGOx_VCACHE_LINE_WAIT",
+        "TGOx_VFETCH_POS_READ_WAIT",
+        "TGOx_VFETCH_VERTEX_WAIT",
+        "TGOx_VFETCH_STALL",
+        "TGOx_PRIMASSY_STALL",
+        "TGOx_BBOX_GEN_STALL",
+        "TGOx_IDVS_VBU_HIT",
+        "TGOx_IDVS_VBU_MISS",
+        "TGOx_IDVS_VBU_LINE_DEALLOCATE",
+        "TGOx_IDVS_VAR_SHAD_REQ",
+        "TGOx_IDVS_VAR_SHAD_STALL",
+        "TGOx_BINNER_STALL",
+        "TGOx_ITER_STALL",
+        "TGOx_COMPRESS_MISS",
+        "TGOx_COMPRESS_STALL",
+        "TGOx_PCACHE_HIT",
+        "TGOx_PCACHE_MISS",
+        "TGOx_PCACHE_MISS_STALL",
+        "TGOx_PCACHE_EVICT_STALL",
+        "TGOx_PMGR_PTR_WR_STALL",
+        "TGOx_PMGR_PTR_RD_STALL",
+        "TGOx_PMGR_CMD_WR_STALL",
+        "TGOx_WRBUF_ACTIVE",
+        "TGOx_WRBUF_HIT",
+        "TGOx_WRBUF_MISS",
+        "TGOx_WRBUF_NO_FREE_LINE_STALL",
+        "TGOx_WRBUF_NO_AXI_ID_STALL",
+        "TGOx_WRBUF_AXI_STALL",
+        "",
+        "",
+        "",
+        "TGOx_UTLB_TRANS",
+        "TGOx_UTLB_TRANS_HIT",
+        "TGOx_UTLB_TRANS_STALL",
+        "TGOx_UTLB_TRANS_MISS_DELAY",
+        "TGOx_UTLB_MMU_REQ",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "TGOx_FRAG_ACTIVE",
+        "TGOx_FRAG_PRIMITIVES",
+        "TGOx_FRAG_PRIM_RAST",
+        "TGOx_FRAG_FPK_ACTIVE",
+        "TGOx_FRAG_STARVING",
+        "TGOx_FRAG_WARPS",
+        "TGOx_FRAG_PARTIAL_WARPS",
+        "TGOx_FRAG_QUADS_RAST",
+        "TGOx_FRAG_QUADS_EZS_TEST",
+        "TGOx_FRAG_QUADS_EZS_UPDATE",
+        "TGOx_FRAG_QUADS_EZS_KILL",
+        "TGOx_FRAG_LZS_TEST",
+        "TGOx_FRAG_LZS_KILL",
+        "TGOx_WARP_REG_SIZE_64",
+        "TGOx_FRAG_PTILES",
+        "TGOx_FRAG_TRANS_ELIM",
+        "TGOx_QUAD_FPK_KILLER",
+        "TGOx_FULL_QUAD_WARPS",
+        "TGOx_COMPUTE_ACTIVE",
+        "TGOx_COMPUTE_TASKS",
+        "TGOx_COMPUTE_WARPS",
+        "TGOx_COMPUTE_STARVING",
+        "TGOx_EXEC_CORE_ACTIVE",
+        "TGOx_EXEC_ACTIVE",
+        "TGOx_EXEC_INSTR_COUNT",
+        "TGOx_EXEC_INSTR_DIVERGED",
+        "TGOx_EXEC_INSTR_STARVING",
+        "TGOx_ARITH_INSTR_SINGLE_FMA",
+        "TGOx_ARITH_INSTR_DOUBLE",
+        "TGOx_ARITH_INSTR_MSG",
+        "TGOx_ARITH_INSTR_MSG_ONLY",
+        "TGOx_TEX_MSGI_NUM_QUADS",
+        "TGOx_TEX_DFCH_NUM_PASSES",
+        "TGOx_TEX_DFCH_NUM_PASSES_MISS",
+        "TGOx_TEX_DFCH_NUM_PASSES_MIP_MAP",
+        "TGOx_TEX_TIDX_NUM_SPLIT_MIP_MAP",
+        "TGOx_TEX_TFCH_NUM_LINES_FETCHED",
+        "TGOx_TEX_TFCH_NUM_LINES_FETCHED_BLOCK_COMPRESSED",
+        "TGOx_TEX_TFCH_NUM_OPERATIONS",
+        "TGOx_TEX_FILT_NUM_OPERATIONS",
+        "TGOx_LS_MEM_READ_FULL",
+        "TGOx_LS_MEM_READ_SHORT",
+        "TGOx_LS_MEM_WRITE_FULL",
+        "TGOx_LS_MEM_WRITE_SHORT",
+        "TGOx_LS_MEM_ATOMIC",
+        "TGOx_VARY_INSTR",
+        "TGOx_VARY_SLOT_32",
+        "TGOx_VARY_SLOT_16",
+        "TGOx_ATTR_INSTR",
+        "TGOx_ARITH_INSTR_FP_MUL",
+        "TGOx_BEATS_RD_FTC",
+        "TGOx_BEATS_RD_FTC_EXT",
+        "TGOx_BEATS_RD_LSC",
+        "TGOx_BEATS_RD_LSC_EXT",
+        "TGOx_BEATS_RD_TEX",
+        "TGOx_BEATS_RD_TEX_EXT",
+        "TGOx_BEATS_RD_OTHER",
+        "TGOx_BEATS_WR_LSC_WB",
+        "TGOx_BEATS_WR_TIB",
+        "TGOx_BEATS_WR_LSC_OTHER",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "TGOx_MMU_REQUESTS",
+        "TGOx_MMU_TABLE_READS_L3",
+        "TGOx_MMU_TABLE_READS_L2",
+        "TGOx_MMU_HIT_L3",
+        "TGOx_MMU_HIT_L2",
+        "TGOx_MMU_S2_REQUESTS",
+        "TGOx_MMU_S2_TABLE_READS_L3",
+        "TGOx_MMU_S2_TABLE_READS_L2",
+        "TGOx_MMU_S2_HIT_L3",
+        "TGOx_MMU_S2_HIT_L2",
+        "",
+        "",
+        "TGOx_L2_RD_MSG_IN",
+        "TGOx_L2_RD_MSG_IN_STALL",
+        "TGOx_L2_WR_MSG_IN",
+        "TGOx_L2_WR_MSG_IN_STALL",
+        "TGOx_L2_SNP_MSG_IN",
+        "TGOx_L2_SNP_MSG_IN_STALL",
+        "TGOx_L2_RD_MSG_OUT",
+        "TGOx_L2_RD_MSG_OUT_STALL",
+        "TGOx_L2_WR_MSG_OUT",
+        "TGOx_L2_ANY_LOOKUP",
+        "TGOx_L2_READ_LOOKUP",
+        "TGOx_L2_WRITE_LOOKUP",
+        "TGOx_L2_EXT_SNOOP_LOOKUP",
+        "TGOx_L2_EXT_READ",
+        "TGOx_L2_EXT_READ_NOSNP",
+        "TGOx_L2_EXT_READ_UNIQUE",
+        "TGOx_L2_EXT_READ_BEATS",
+        "TGOx_L2_EXT_AR_STALL",
+        "TGOx_L2_EXT_AR_CNT_Q1",
+        "TGOx_L2_EXT_AR_CNT_Q2",
+        "TGOx_L2_EXT_AR_CNT_Q3",
+        "TGOx_L2_EXT_RRESP_0_127",
+        "TGOx_L2_EXT_RRESP_128_191",
+        "TGOx_L2_EXT_RRESP_192_255",
+        "TGOx_L2_EXT_RRESP_256_319",
+        "TGOx_L2_EXT_RRESP_320_383",
+        "TGOx_L2_EXT_WRITE",
+        "TGOx_L2_EXT_WRITE_NOSNP_FULL",
+        "TGOx_L2_EXT_WRITE_NOSNP_PTL",
+        "TGOx_L2_EXT_WRITE_SNP_FULL",
+        "TGOx_L2_EXT_WRITE_SNP_PTL",
+        "TGOx_L2_EXT_WRITE_BEATS",
+        "TGOx_L2_EXT_W_STALL",
+        "TGOx_L2_EXT_AW_CNT_Q1",
+        "TGOx_L2_EXT_AW_CNT_Q2",
+        "TGOx_L2_EXT_AW_CNT_Q3",
+        "TGOx_L2_EXT_SNOOP",
+        "TGOx_L2_EXT_SNOOP_STALL",
+        "TGOx_L2_EXT_SNOOP_RESP_CLEAN",
+        "TGOx_L2_EXT_SNOOP_RESP_DATA",
+        "TGOx_L2_EXT_SNOOP_INTERNAL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    };
+
+    static const char * const hardware_counters_mali_tTRx[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "TTRx_MESSAGES_SENT",
+        "TTRx_MESSAGES_RECEIVED",
+        "TTRx_GPU_ACTIVE",
+        "TTRx_IRQ_ACTIVE",
+        "TTRx_JS0_JOBS",
+        "TTRx_JS0_TASKS",
+        "TTRx_JS0_ACTIVE",
+        "TTRx_JS0_WAIT_FLUSH",
+        "TTRx_JS0_WAIT_READ",
+        "TTRx_JS0_WAIT_ISSUE",
+        "TTRx_JS0_WAIT_DEPEND",
+        "TTRx_JS0_WAIT_FINISH",
+        "TTRx_JS1_JOBS",
+        "TTRx_JS1_TASKS",
+        "TTRx_JS1_ACTIVE",
+        "TTRx_JS1_WAIT_FLUSH",
+        "TTRx_JS1_WAIT_READ",
+        "TTRx_JS1_WAIT_ISSUE",
+        "TTRx_JS1_WAIT_DEPEND",
+        "TTRx_JS1_WAIT_FINISH",
+        "TTRx_JS2_JOBS",
+        "TTRx_JS2_TASKS",
+        "TTRx_JS2_ACTIVE",
+        "TTRx_JS2_WAIT_FLUSH",
+        "TTRx_JS2_WAIT_READ",
+        "TTRx_JS2_WAIT_ISSUE",
+        "TTRx_JS2_WAIT_DEPEND",
+        "TTRx_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "TTRx_CACHE_FLUSH",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "",
+        "TTRx_TILER_ACTIVE",
+        "TTRx_JOBS_PROCESSED",
+        "TTRx_TRIANGLES",
+        "TTRx_LINES",
+        "TTRx_POINTS",
+        "TTRx_FRONT_FACING",
+        "TTRx_BACK_FACING",
+        "TTRx_PRIM_VISIBLE",
+        "TTRx_PRIM_CULLED",
+        "TTRx_PRIM_CLIPPED",
+        "TTRx_PRIM_SAT_CULLED",
+        "TTRx_BIN_ALLOC_INIT",
+        "TTRx_BIN_ALLOC_OVERFLOW",
+        "TTRx_BUS_READ",
+        "",
+        "TTRx_BUS_WRITE",
+        "TTRx_LOADING_DESC",
+        "TTRx_IDVS_POS_SHAD_REQ",
+        "TTRx_IDVS_POS_SHAD_WAIT",
+        "TTRx_IDVS_POS_SHAD_STALL",
+        "TTRx_IDVS_POS_FIFO_FULL",
+        "TTRx_PREFETCH_STALL",
+        "TTRx_VCACHE_HIT",
+        "TTRx_VCACHE_MISS",
+        "TTRx_VCACHE_LINE_WAIT",
+        "TTRx_VFETCH_POS_READ_WAIT",
+        "TTRx_VFETCH_VERTEX_WAIT",
+        "TTRx_VFETCH_STALL",
+        "TTRx_PRIMASSY_STALL",
+        "TTRx_BBOX_GEN_STALL",
+        "TTRx_IDVS_VBU_HIT",
+        "TTRx_IDVS_VBU_MISS",
+        "TTRx_IDVS_VBU_LINE_DEALLOCATE",
+        "TTRx_IDVS_VAR_SHAD_REQ",
+        "TTRx_IDVS_VAR_SHAD_STALL",
+        "TTRx_BINNER_STALL",
+        "TTRx_ITER_STALL",
+        "TTRx_COMPRESS_MISS",
+        "TTRx_COMPRESS_STALL",
+        "TTRx_PCACHE_HIT",
+        "TTRx_PCACHE_MISS",
+        "TTRx_PCACHE_MISS_STALL",
+        "TTRx_PCACHE_EVICT_STALL",
+        "TTRx_PMGR_PTR_WR_STALL",
+        "TTRx_PMGR_PTR_RD_STALL",
+        "TTRx_PMGR_CMD_WR_STALL",
+        "TTRx_WRBUF_ACTIVE",
+        "TTRx_WRBUF_HIT",
+        "TTRx_WRBUF_MISS",
+        "TTRx_WRBUF_NO_FREE_LINE_STALL",
+        "TTRx_WRBUF_NO_AXI_ID_STALL",
+        "TTRx_WRBUF_AXI_STALL",
+        "",
+        "",
+        "",
+        "TTRx_UTLB_TRANS",
+        "TTRx_UTLB_TRANS_HIT",
+        "TTRx_UTLB_TRANS_STALL",
+        "TTRx_UTLB_TRANS_MISS_DELAY",
+        "TTRx_UTLB_MMU_REQ",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "TTRx_FRAG_ACTIVE",
+        "TTRx_FRAG_PRIMITIVES_OUT",
+        "TTRx_FRAG_PRIM_RAST",
+        "TTRx_FRAG_FPK_ACTIVE",
+        "TTRx_FRAG_STARVING",
+        "TTRx_FRAG_WARPS",
+        "TTRx_FRAG_PARTIAL_QUADS_RAST",
+        "TTRx_FRAG_QUADS_RAST",
+        "TTRx_FRAG_QUADS_EZS_TEST",
+        "TTRx_FRAG_QUADS_EZS_UPDATE",
+        "TTRx_FRAG_QUADS_EZS_KILL",
+        "TTRx_FRAG_LZS_TEST",
+        "TTRx_FRAG_LZS_KILL",
+        "TTRx_WARP_REG_SIZE_64",
+        "TTRx_FRAG_PTILES",
+        "TTRx_FRAG_TRANS_ELIM",
+        "TTRx_QUAD_FPK_KILLER",
+        "TTRx_FULL_QUAD_WARPS",
+        "TTRx_COMPUTE_ACTIVE",
+        "TTRx_COMPUTE_TASKS",
+        "TTRx_COMPUTE_WARPS",
+        "TTRx_COMPUTE_STARVING",
+        "TTRx_EXEC_CORE_ACTIVE",
+        "TTRx_EXEC_INSTR_FMA",
+        "TTRx_EXEC_INSTR_CVT",
+        "TTRx_EXEC_INSTR_SFU",
+        "TTRx_EXEC_INSTR_MSG",
+        "TTRx_EXEC_INSTR_DIVERGED",
+        "TTRx_EXEC_ICACHE_MISS",
+        "TTRx_EXEC_STARVE_ARITH",
+        "TTRx_CALL_BLEND_SHADER",
+        "TTRx_TEX_MSGI_NUM_FLITS",
+        "TTRx_TEX_DFCH_CLK_STALLED",
+        "TTRx_TEX_TFCH_CLK_STALLED",
+        "TTRx_TEX_TFCH_STARVED_PENDING_DATA_FETCH",
+        "TTRx_TEX_FILT_NUM_OPERATIONS",
+        "TTRx_TEX_FILT_NUM_FXR_OPERATIONS",
+        "TTRx_TEX_FILT_NUM_FST_OPERATIONS",
+        "TTRx_TEX_MSGO_NUM_MSG",
+        "TTRx_TEX_MSGO_NUM_FLITS",
+        "TTRx_LS_MEM_READ_FULL",
+        "TTRx_LS_MEM_READ_SHORT",
+        "TTRx_LS_MEM_WRITE_FULL",
+        "TTRx_LS_MEM_WRITE_SHORT",
+        "TTRx_LS_MEM_ATOMIC",
+        "TTRx_VARY_INSTR",
+        "TTRx_VARY_SLOT_32",
+        "TTRx_VARY_SLOT_16",
+        "TTRx_ATTR_INSTR",
+        "TTRx_ARITH_INSTR_FP_MUL",
+        "TTRx_BEATS_RD_FTC",
+        "TTRx_BEATS_RD_FTC_EXT",
+        "TTRx_BEATS_RD_LSC",
+        "TTRx_BEATS_RD_LSC_EXT",
+        "TTRx_BEATS_RD_TEX",
+        "TTRx_BEATS_RD_TEX_EXT",
+        "TTRx_BEATS_RD_OTHER",
+        "TTRx_BEATS_WR_LSC_OTHER",
+        "TTRx_BEATS_WR_TIB",
+        "TTRx_BEATS_WR_LSC_WB",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "TTRx_MMU_REQUESTS",
+        "TTRx_MMU_TABLE_READS_L3",
+        "TTRx_MMU_TABLE_READS_L2",
+        "TTRx_MMU_HIT_L3",
+        "TTRx_MMU_HIT_L2",
+        "TTRx_MMU_S2_REQUESTS",
+        "TTRx_MMU_S2_TABLE_READS_L3",
+        "TTRx_MMU_S2_TABLE_READS_L2",
+        "TTRx_MMU_S2_HIT_L3",
+        "TTRx_MMU_S2_HIT_L2",
+        "",
+        "",
+        "TTRx_L2_RD_MSG_IN",
+        "TTRx_L2_RD_MSG_IN_STALL",
+        "TTRx_L2_WR_MSG_IN",
+        "TTRx_L2_WR_MSG_IN_STALL",
+        "TTRx_L2_SNP_MSG_IN",
+        "TTRx_L2_SNP_MSG_IN_STALL",
+        "TTRx_L2_RD_MSG_OUT",
+        "TTRx_L2_RD_MSG_OUT_STALL",
+        "TTRx_L2_WR_MSG_OUT",
+        "TTRx_L2_ANY_LOOKUP",
+        "TTRx_L2_READ_LOOKUP",
+        "TTRx_L2_WRITE_LOOKUP",
+        "TTRx_L2_EXT_SNOOP_LOOKUP",
+        "TTRx_L2_EXT_READ",
+        "TTRx_L2_EXT_READ_NOSNP",
+        "TTRx_L2_EXT_READ_UNIQUE",
+        "TTRx_L2_EXT_READ_BEATS",
+        "TTRx_L2_EXT_AR_STALL",
+        "TTRx_L2_EXT_AR_CNT_Q1",
+        "TTRx_L2_EXT_AR_CNT_Q2",
+        "TTRx_L2_EXT_AR_CNT_Q3",
+        "TTRx_L2_EXT_RRESP_0_127",
+        "TTRx_L2_EXT_RRESP_128_191",
+        "TTRx_L2_EXT_RRESP_192_255",
+        "TTRx_L2_EXT_RRESP_256_319",
+        "TTRx_L2_EXT_RRESP_320_383",
+        "TTRx_L2_EXT_WRITE",
+        "TTRx_L2_EXT_WRITE_NOSNP_FULL",
+        "TTRx_L2_EXT_WRITE_NOSNP_PTL",
+        "TTRx_L2_EXT_WRITE_SNP_FULL",
+        "TTRx_L2_EXT_WRITE_SNP_PTL",
+        "TTRx_L2_EXT_WRITE_BEATS",
+        "TTRx_L2_EXT_W_STALL",
+        "TTRx_L2_EXT_AW_CNT_Q1",
+        "TTRx_L2_EXT_AW_CNT_Q2",
+        "TTRx_L2_EXT_AW_CNT_Q3",
+        "TTRx_L2_EXT_SNOOP",
+        "TTRx_L2_EXT_SNOOP_STALL",
+        "TTRx_L2_EXT_SNOOP_RESP_CLEAN",
+        "TTRx_L2_EXT_SNOOP_RESP_DATA",
+        "TTRx_L2_EXT_SNOOP_INTERNAL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    };
+
+    static const char * const hardware_counters_mali_tNAx[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "TNAx_MESSAGES_SENT",
+        "TNAx_MESSAGES_RECEIVED",
+        "TNAx_GPU_ACTIVE",
+        "TNAx_IRQ_ACTIVE",
+        "TNAx_JS0_JOBS",
+        "TNAx_JS0_TASKS",
+        "TNAx_JS0_ACTIVE",
+        "TNAx_JS0_WAIT_FLUSH",
+        "TNAx_JS0_WAIT_READ",
+        "TNAx_JS0_WAIT_ISSUE",
+        "TNAx_JS0_WAIT_DEPEND",
+        "TNAx_JS0_WAIT_FINISH",
+        "TNAx_JS1_JOBS",
+        "TNAx_JS1_TASKS",
+        "TNAx_JS1_ACTIVE",
+        "TNAx_JS1_WAIT_FLUSH",
+        "TNAx_JS1_WAIT_READ",
+        "TNAx_JS1_WAIT_ISSUE",
+        "TNAx_JS1_WAIT_DEPEND",
+        "TNAx_JS1_WAIT_FINISH",
+        "TNAx_JS2_JOBS",
+        "TNAx_JS2_TASKS",
+        "TNAx_JS2_ACTIVE",
+        "TNAx_JS2_WAIT_FLUSH",
+        "TNAx_JS2_WAIT_READ",
+        "TNAx_JS2_WAIT_ISSUE",
+        "TNAx_JS2_WAIT_DEPEND",
+        "TNAx_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "TNAx_CACHE_FLUSH",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "",
+        "TNAx_TILER_ACTIVE",
+        "TNAx_JOBS_PROCESSED",
+        "TNAx_TRIANGLES",
+        "TNAx_LINES",
+        "TNAx_POINTS",
+        "TNAx_FRONT_FACING",
+        "TNAx_BACK_FACING",
+        "TNAx_PRIM_VISIBLE",
+        "TNAx_PRIM_CULLED",
+        "TNAx_PRIM_CLIPPED",
+        "TNAx_PRIM_SAT_CULLED",
+        "TNAx_BIN_ALLOC_INIT",
+        "TNAx_BIN_ALLOC_OVERFLOW",
+        "TNAx_BUS_READ",
+        "",
+        "TNAx_BUS_WRITE",
+        "TNAx_LOADING_DESC",
+        "TNAx_IDVS_POS_SHAD_REQ",
+        "TNAx_IDVS_POS_SHAD_WAIT",
+        "TNAx_IDVS_POS_SHAD_STALL",
+        "TNAx_IDVS_POS_FIFO_FULL",
+        "TNAx_PREFETCH_STALL",
+        "TNAx_VCACHE_HIT",
+        "TNAx_VCACHE_MISS",
+        "TNAx_VCACHE_LINE_WAIT",
+        "TNAx_VFETCH_POS_READ_WAIT",
+        "TNAx_VFETCH_VERTEX_WAIT",
+        "TNAx_VFETCH_STALL",
+        "TNAx_PRIMASSY_STALL",
+        "TNAx_BBOX_GEN_STALL",
+        "TNAx_IDVS_VBU_HIT",
+        "TNAx_IDVS_VBU_MISS",
+        "TNAx_IDVS_VBU_LINE_DEALLOCATE",
+        "TNAx_IDVS_VAR_SHAD_REQ",
+        "TNAx_IDVS_VAR_SHAD_STALL",
+        "TNAx_BINNER_STALL",
+        "TNAx_ITER_STALL",
+        "TNAx_COMPRESS_MISS",
+        "TNAx_COMPRESS_STALL",
+        "TNAx_PCACHE_HIT",
+        "TNAx_PCACHE_MISS",
+        "TNAx_PCACHE_MISS_STALL",
+        "TNAx_PCACHE_EVICT_STALL",
+        "TNAx_PMGR_PTR_WR_STALL",
+        "TNAx_PMGR_PTR_RD_STALL",
+        "TNAx_PMGR_CMD_WR_STALL",
+        "TNAx_WRBUF_ACTIVE",
+        "TNAx_WRBUF_HIT",
+        "TNAx_WRBUF_MISS",
+        "TNAx_WRBUF_NO_FREE_LINE_STALL",
+        "TNAx_WRBUF_NO_AXI_ID_STALL",
+        "TNAx_WRBUF_AXI_STALL",
+        "",
+        "",
+        "",
+        "TNAx_UTLB_TRANS",
+        "TNAx_UTLB_TRANS_HIT",
+        "TNAx_UTLB_TRANS_STALL",
+        "TNAx_UTLB_TRANS_MISS_DELAY",
+        "TNAx_UTLB_MMU_REQ",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "TNAx_FRAG_ACTIVE",
+        "TNAx_FRAG_PRIMITIVES_OUT",
+        "TNAx_FRAG_PRIM_RAST",
+        "TNAx_FRAG_FPK_ACTIVE",
+        "TNAx_FRAG_STARVING",
+        "TNAx_FRAG_WARPS",
+        "TNAx_FRAG_PARTIAL_QUADS_RAST",
+        "TNAx_FRAG_QUADS_RAST",
+        "TNAx_FRAG_QUADS_EZS_TEST",
+        "TNAx_FRAG_QUADS_EZS_UPDATE",
+        "TNAx_FRAG_QUADS_EZS_KILL",
+        "TNAx_FRAG_LZS_TEST",
+        "TNAx_FRAG_LZS_KILL",
+        "TNAx_WARP_REG_SIZE_64",
+        "TNAx_FRAG_PTILES",
+        "TNAx_FRAG_TRANS_ELIM",
+        "TNAx_QUAD_FPK_KILLER",
+        "TNAx_FULL_QUAD_WARPS",
+        "TNAx_COMPUTE_ACTIVE",
+        "TNAx_COMPUTE_TASKS",
+        "TNAx_COMPUTE_WARPS",
+        "TNAx_COMPUTE_STARVING",
+        "TNAx_EXEC_CORE_ACTIVE",
+        "TNAx_EXEC_INSTR_FMA",
+        "TNAx_EXEC_INSTR_CVT",
+        "TNAx_EXEC_INSTR_SFU",
+        "TNAx_EXEC_INSTR_MSG",
+        "TNAx_EXEC_INSTR_DIVERGED",
+        "TNAx_EXEC_ICACHE_MISS",
+        "TNAx_EXEC_STARVE_ARITH",
+        "TNAx_CALL_BLEND_SHADER",
+        "TNAx_TEX_MSGI_NUM_FLITS",
+        "TNAx_TEX_DFCH_CLK_STALLED",
+        "TNAx_TEX_TFCH_CLK_STALLED",
+        "TNAx_TEX_TFCH_STARVED_PENDING_DATA_FETCH",
+        "TNAx_TEX_FILT_NUM_OPERATIONS",
+        "TNAx_TEX_FILT_NUM_FXR_OPERATIONS",
+        "TNAx_TEX_FILT_NUM_FST_OPERATIONS",
+        "TNAx_TEX_MSGO_NUM_MSG",
+        "TNAx_TEX_MSGO_NUM_FLITS",
+        "TNAx_LS_MEM_READ_FULL",
+        "TNAx_LS_MEM_READ_SHORT",
+        "TNAx_LS_MEM_WRITE_FULL",
+        "TNAx_LS_MEM_WRITE_SHORT",
+        "TNAx_LS_MEM_ATOMIC",
+        "TNAx_VARY_INSTR",
+        "TNAx_VARY_SLOT_32",
+        "TNAx_VARY_SLOT_16",
+        "TNAx_ATTR_INSTR",
+        "TNAx_ARITH_INSTR_FP_MUL",
+        "TNAx_BEATS_RD_FTC",
+        "TNAx_BEATS_RD_FTC_EXT",
+        "TNAx_BEATS_RD_LSC",
+        "TNAx_BEATS_RD_LSC_EXT",
+        "TNAx_BEATS_RD_TEX",
+        "TNAx_BEATS_RD_TEX_EXT",
+        "TNAx_BEATS_RD_OTHER",
+        "TNAx_BEATS_WR_LSC_OTHER",
+        "TNAx_BEATS_WR_TIB",
+        "TNAx_BEATS_WR_LSC_WB",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "TNAx_MMU_REQUESTS",
+        "TNAx_MMU_TABLE_READS_L3",
+        "TNAx_MMU_TABLE_READS_L2",
+        "TNAx_MMU_HIT_L3",
+        "TNAx_MMU_HIT_L2",
+        "TNAx_MMU_S2_REQUESTS",
+        "TNAx_MMU_S2_TABLE_READS_L3",
+        "TNAx_MMU_S2_TABLE_READS_L2",
+        "TNAx_MMU_S2_HIT_L3",
+        "TNAx_MMU_S2_HIT_L2",
+        "",
+        "",
+        "TNAx_L2_RD_MSG_IN",
+        "TNAx_L2_RD_MSG_IN_STALL",
+        "TNAx_L2_WR_MSG_IN",
+        "TNAx_L2_WR_MSG_IN_STALL",
+        "TNAx_L2_SNP_MSG_IN",
+        "TNAx_L2_SNP_MSG_IN_STALL",
+        "TNAx_L2_RD_MSG_OUT",
+        "TNAx_L2_RD_MSG_OUT_STALL",
+        "TNAx_L2_WR_MSG_OUT",
+        "TNAx_L2_ANY_LOOKUP",
+        "TNAx_L2_READ_LOOKUP",
+        "TNAx_L2_WRITE_LOOKUP",
+        "TNAx_L2_EXT_SNOOP_LOOKUP",
+        "TNAx_L2_EXT_READ",
+        "TNAx_L2_EXT_READ_NOSNP",
+        "TNAx_L2_EXT_READ_UNIQUE",
+        "TNAx_L2_EXT_READ_BEATS",
+        "TNAx_L2_EXT_AR_STALL",
+        "TNAx_L2_EXT_AR_CNT_Q1",
+        "TNAx_L2_EXT_AR_CNT_Q2",
+        "TNAx_L2_EXT_AR_CNT_Q3",
+        "TNAx_L2_EXT_RRESP_0_127",
+        "TNAx_L2_EXT_RRESP_128_191",
+        "TNAx_L2_EXT_RRESP_192_255",
+        "TNAx_L2_EXT_RRESP_256_319",
+        "TNAx_L2_EXT_RRESP_320_383",
+        "TNAx_L2_EXT_WRITE",
+        "TNAx_L2_EXT_WRITE_NOSNP_FULL",
+        "TNAx_L2_EXT_WRITE_NOSNP_PTL",
+        "TNAx_L2_EXT_WRITE_SNP_FULL",
+        "TNAx_L2_EXT_WRITE_SNP_PTL",
+        "TNAx_L2_EXT_WRITE_BEATS",
+        "TNAx_L2_EXT_W_STALL",
+        "TNAx_L2_EXT_AW_CNT_Q1",
+        "TNAx_L2_EXT_AW_CNT_Q2",
+        "TNAx_L2_EXT_AW_CNT_Q3",
+        "TNAx_L2_EXT_SNOOP",
+        "TNAx_L2_EXT_SNOOP_STALL",
+        "TNAx_L2_EXT_SNOOP_RESP_CLEAN",
+        "TNAx_L2_EXT_SNOOP_RESP_DATA",
+        "TNAx_L2_EXT_SNOOP_INTERNAL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    };
+
+    static const char * const hardware_counters_mali_tOTx[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "TOTx_MESSAGES_SENT",
+        "TOTx_MESSAGES_RECEIVED",
+        "TOTx_GPU_ACTIVE",
+        "TOTx_IRQ_ACTIVE",
+        "TOTx_JS0_JOBS",
+        "TOTx_JS0_TASKS",
+        "TOTx_JS0_ACTIVE",
+        "TOTx_JS0_WAIT_FLUSH",
+        "TOTx_JS0_WAIT_READ",
+        "TOTx_JS0_WAIT_ISSUE",
+        "TOTx_JS0_WAIT_DEPEND",
+        "TOTx_JS0_WAIT_FINISH",
+        "TOTx_JS1_JOBS",
+        "TOTx_JS1_TASKS",
+        "TOTx_JS1_ACTIVE",
+        "TOTx_JS1_WAIT_FLUSH",
+        "TOTx_JS1_WAIT_READ",
+        "TOTx_JS1_WAIT_ISSUE",
+        "TOTx_JS1_WAIT_DEPEND",
+        "TOTx_JS1_WAIT_FINISH",
+        "TOTx_JS2_JOBS",
+        "TOTx_JS2_TASKS",
+        "TOTx_JS2_ACTIVE",
+        "TOTx_JS2_WAIT_FLUSH",
+        "TOTx_JS2_WAIT_READ",
+        "TOTx_JS2_WAIT_ISSUE",
+        "TOTx_JS2_WAIT_DEPEND",
+        "TOTx_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "TOTx_CACHE_FLUSH",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "",
+        "TOTx_TILER_ACTIVE",
+        "TOTx_JOBS_PROCESSED",
+        "TOTx_TRIANGLES",
+        "TOTx_LINES",
+        "TOTx_POINTS",
+        "TOTx_FRONT_FACING",
+        "TOTx_BACK_FACING",
+        "TOTx_PRIM_VISIBLE",
+        "TOTx_PRIM_CULLED",
+        "TOTx_PRIM_CLIPPED",
+        "TOTx_PRIM_SAT_CULLED",
+        "TOTx_BIN_ALLOC_INIT",
+        "TOTx_BIN_ALLOC_OVERFLOW",
+        "TOTx_BUS_READ",
+        "",
+        "TOTx_BUS_WRITE",
+        "TOTx_LOADING_DESC",
+        "TOTx_IDVS_POS_SHAD_REQ",
+        "TOTx_IDVS_POS_SHAD_WAIT",
+        "TOTx_IDVS_POS_SHAD_STALL",
+        "TOTx_IDVS_POS_FIFO_FULL",
+        "TOTx_PREFETCH_STALL",
+        "TOTx_VCACHE_HIT",
+        "TOTx_VCACHE_MISS",
+        "TOTx_VCACHE_LINE_WAIT",
+        "TOTx_VFETCH_POS_READ_WAIT",
+        "TOTx_VFETCH_VERTEX_WAIT",
+        "TOTx_VFETCH_STALL",
+        "TOTx_PRIMASSY_STALL",
+        "TOTx_BBOX_GEN_STALL",
+        "TOTx_IDVS_VBU_HIT",
+        "TOTx_IDVS_VBU_MISS",
+        "TOTx_IDVS_VBU_LINE_DEALLOCATE",
+        "TOTx_IDVS_VAR_SHAD_REQ",
+        "TOTx_IDVS_VAR_SHAD_STALL",
+        "TOTx_BINNER_STALL",
+        "TOTx_ITER_STALL",
+        "TOTx_COMPRESS_MISS",
+        "TOTx_COMPRESS_STALL",
+        "TOTx_PCACHE_HIT",
+        "TOTx_PCACHE_MISS",
+        "TOTx_PCACHE_MISS_STALL",
+        "TOTx_PCACHE_EVICT_STALL",
+        "TOTx_PMGR_PTR_WR_STALL",
+        "TOTx_PMGR_PTR_RD_STALL",
+        "TOTx_PMGR_CMD_WR_STALL",
+        "TOTx_WRBUF_ACTIVE",
+        "TOTx_WRBUF_HIT",
+        "TOTx_WRBUF_MISS",
+        "TOTx_WRBUF_NO_FREE_LINE_STALL",
+        "TOTx_WRBUF_NO_AXI_ID_STALL",
+        "TOTx_WRBUF_AXI_STALL",
+        "TOTx_UTLB0_TRANS",
+        "TOTx_UTLB0_TRANS_HIT",
+        "TOTx_UTLB0_TRANS_STALL",
+        "TOTx_UTLB0_MMU_REQ",
+        "TOTx_UTLB1_TRANS",
+        "TOTx_UTLB1_TRANS_HIT",
+        "TOTx_UTLB1_TRANS_STALL",
+        "TOTx_UTLB1_MMU_REQ",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "TOTx_FRAG_ACTIVE",
+        "TOTx_FRAG_PRIMITIVES_OUT",
+        "TOTx_FRAG_PRIM_RAST",
+        "TOTx_FRAG_FPK_ACTIVE",
+        "TOTx_FRAG_STARVING",
+        "TOTx_FRAG_WARPS",
+        "TOTx_FRAG_PARTIAL_QUADS_RAST",
+        "TOTx_FRAG_QUADS_RAST",
+        "TOTx_FRAG_QUADS_EZS_TEST",
+        "TOTx_FRAG_QUADS_EZS_UPDATE",
+        "TOTx_FRAG_QUADS_EZS_KILL",
+        "TOTx_FRAG_LZS_TEST",
+        "TOTx_FRAG_LZS_KILL",
+        "TOTx_WARP_REG_SIZE_64",
+        "TOTx_FRAG_PTILES",
+        "TOTx_FRAG_TRANS_ELIM",
+        "TOTx_QUAD_FPK_KILLER",
+        "TOTx_FULL_QUAD_WARPS",
+        "TOTx_COMPUTE_ACTIVE",
+        "TOTx_COMPUTE_TASKS",
+        "TOTx_COMPUTE_WARPS",
+        "TOTx_COMPUTE_STARVING",
+        "TOTx_EXEC_CORE_ACTIVE",
+        "TOTx_EXEC_INSTR_FMA",
+        "TOTx_EXEC_INSTR_CVT",
+        "TOTx_EXEC_INSTR_SFU",
+        "TOTx_EXEC_INSTR_MSG",
+        "TOTx_EXEC_INSTR_DIVERGED",
+        "TOTx_EXEC_ICACHE_MISS",
+        "TOTx_EXEC_STARVE_ARITH",
+        "TOTx_CALL_BLEND_SHADER",
+        "TOTx_TEX_MSGI_NUM_FLITS",
+        "TOTx_TEX_DFCH_CLK_STALLED",
+        "TOTx_TEX_TFCH_CLK_STALLED",
+        "TOTx_TEX_TFCH_STARVED_PENDING_DATA_FETCH",
+        "TOTx_TEX_FILT_NUM_OPERATIONS",
+        "TOTx_TEX_FILT_NUM_FXR_OPERATIONS",
+        "TOTx_TEX_FILT_NUM_FST_OPERATIONS",
+        "TOTx_TEX_MSGO_NUM_MSG",
+        "TOTx_TEX_MSGO_NUM_FLITS",
+        "TOTx_LS_MEM_READ_FULL",
+        "TOTx_LS_MEM_READ_SHORT",
+        "TOTx_LS_MEM_WRITE_FULL",
+        "TOTx_LS_MEM_WRITE_SHORT",
+        "TOTx_LS_MEM_ATOMIC",
+        "TOTx_VARY_INSTR",
+        "TOTx_VARY_SLOT_32",
+        "TOTx_VARY_SLOT_16",
+        "TOTx_ATTR_INSTR",
+        "TOTx_SHADER_CORE_ACTIVE",
+        "TOTx_BEATS_RD_FTC",
+        "TOTx_BEATS_RD_FTC_EXT",
+        "TOTx_BEATS_RD_LSC",
+        "TOTx_BEATS_RD_LSC_EXT",
+        "TOTx_BEATS_RD_TEX",
+        "TOTx_BEATS_RD_TEX_EXT",
+        "TOTx_BEATS_RD_OTHER",
+        "TOTx_BEATS_WR_LSC_OTHER",
+        "TOTx_BEATS_WR_TIB",
+        "TOTx_BEATS_WR_LSC_WB",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "TOTx_MMU_REQUESTS",
+        "TOTx_MMU_TABLE_READS_L3",
+        "TOTx_MMU_TABLE_READS_L2",
+        "TOTx_MMU_HIT_L3",
+        "TOTx_MMU_HIT_L2",
+        "TOTx_MMU_S2_REQUESTS",
+        "TOTx_MMU_S2_TABLE_READS_L3",
+        "TOTx_MMU_S2_TABLE_READS_L2",
+        "TOTx_MMU_S2_HIT_L3",
+        "TOTx_MMU_S2_HIT_L2",
+        "",
+        "",
+        "TOTx_L2_RD_MSG_IN",
+        "TOTx_L2_RD_MSG_IN_STALL",
+        "TOTx_L2_WR_MSG_IN",
+        "TOTx_L2_WR_MSG_IN_STALL",
+        "TOTx_L2_SNP_MSG_IN",
+        "TOTx_L2_SNP_MSG_IN_STALL",
+        "TOTx_L2_RD_MSG_OUT",
+        "TOTx_L2_RD_MSG_OUT_STALL",
+        "TOTx_L2_WR_MSG_OUT",
+        "TOTx_L2_ANY_LOOKUP",
+        "TOTx_L2_READ_LOOKUP",
+        "TOTx_L2_WRITE_LOOKUP",
+        "TOTx_L2_EXT_SNOOP_LOOKUP",
+        "TOTx_L2_EXT_READ",
+        "TOTx_L2_EXT_READ_NOSNP",
+        "TOTx_L2_EXT_READ_UNIQUE",
+        "TOTx_L2_EXT_READ_BEATS",
+        "TOTx_L2_EXT_AR_STALL",
+        "TOTx_L2_EXT_AR_CNT_Q1",
+        "TOTx_L2_EXT_AR_CNT_Q2",
+        "TOTx_L2_EXT_AR_CNT_Q3",
+        "TOTx_L2_EXT_RRESP_0_127",
+        "TOTx_L2_EXT_RRESP_128_191",
+        "TOTx_L2_EXT_RRESP_192_255",
+        "TOTx_L2_EXT_RRESP_256_319",
+        "TOTx_L2_EXT_RRESP_320_383",
+        "TOTx_L2_EXT_WRITE",
+        "TOTx_L2_EXT_WRITE_NOSNP_FULL",
+        "TOTx_L2_EXT_WRITE_NOSNP_PTL",
+        "TOTx_L2_EXT_WRITE_SNP_FULL",
+        "TOTx_L2_EXT_WRITE_SNP_PTL",
+        "TOTx_L2_EXT_WRITE_BEATS",
+        "TOTx_L2_EXT_W_STALL",
+        "TOTx_L2_EXT_AW_CNT_Q1",
+        "TOTx_L2_EXT_AW_CNT_Q2",
+        "TOTx_L2_EXT_AW_CNT_Q3",
+        "TOTx_L2_EXT_SNOOP",
+        "TOTx_L2_EXT_SNOOP_STALL",
+        "TOTx_L2_EXT_SNOOP_RESP_CLEAN",
+        "TOTx_L2_EXT_SNOOP_RESP_DATA",
+        "TOTx_L2_EXT_SNOOP_INTERNAL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    };
+
+    static const char * const hardware_counters_mali_tBOx[] = {
+        /* Job Manager */
+        "",
+        "",
+        "",
+        "",
+        "TBOx_MESSAGES_SENT",
+        "TBOx_MESSAGES_RECEIVED",
+        "TBOx_GPU_ACTIVE",
+        "TBOx_IRQ_ACTIVE",
+        "TBOx_JS0_JOBS",
+        "TBOx_JS0_TASKS",
+        "TBOx_JS0_ACTIVE",
+        "TBOx_JS0_WAIT_FLUSH",
+        "TBOx_JS0_WAIT_READ",
+        "TBOx_JS0_WAIT_ISSUE",
+        "TBOx_JS0_WAIT_DEPEND",
+        "TBOx_JS0_WAIT_FINISH",
+        "TBOx_JS1_JOBS",
+        "TBOx_JS1_TASKS",
+        "TBOx_JS1_ACTIVE",
+        "TBOx_JS1_WAIT_FLUSH",
+        "TBOx_JS1_WAIT_READ",
+        "TBOx_JS1_WAIT_ISSUE",
+        "TBOx_JS1_WAIT_DEPEND",
+        "TBOx_JS1_WAIT_FINISH",
+        "TBOx_JS2_JOBS",
+        "TBOx_JS2_TASKS",
+        "TBOx_JS2_ACTIVE",
+        "TBOx_JS2_WAIT_FLUSH",
+        "TBOx_JS2_WAIT_READ",
+        "TBOx_JS2_WAIT_ISSUE",
+        "TBOx_JS2_WAIT_DEPEND",
+        "TBOx_JS2_WAIT_FINISH",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "TBOx_CACHE_FLUSH",
+
+        /* Tiler */
+        "",
+        "",
+        "",
+        "",
+        "TBOx_TILER_ACTIVE",
+        "TBOx_JOBS_PROCESSED",
+        "TBOx_TRIANGLES",
+        "TBOx_LINES",
+        "TBOx_POINTS",
+        "TBOx_FRONT_FACING",
+        "TBOx_BACK_FACING",
+        "TBOx_PRIM_VISIBLE",
+        "TBOx_PRIM_CULLED",
+        "TBOx_PRIM_CLIPPED",
+        "TBOx_PRIM_SAT_CULLED",
+        "TBOx_BIN_ALLOC_INIT",
+        "TBOx_BIN_ALLOC_OVERFLOW",
+        "TBOx_BUS_READ",
+        "",
+        "TBOx_BUS_WRITE",
+        "TBOx_LOADING_DESC",
+        "TBOx_IDVS_POS_SHAD_REQ",
+        "TBOx_IDVS_POS_SHAD_WAIT",
+        "TBOx_IDVS_POS_SHAD_STALL",
+        "TBOx_IDVS_POS_FIFO_FULL",
+        "TBOx_PREFETCH_STALL",
+        "TBOx_VCACHE_HIT",
+        "TBOx_VCACHE_MISS",
+        "TBOx_VCACHE_LINE_WAIT",
+        "TBOx_VFETCH_POS_READ_WAIT",
+        "TBOx_VFETCH_VERTEX_WAIT",
+        "TBOx_VFETCH_STALL",
+        "TBOx_PRIMASSY_STALL",
+        "TBOx_BBOX_GEN_STALL",
+        "TBOx_IDVS_VBU_HIT",
+        "TBOx_IDVS_VBU_MISS",
+        "TBOx_IDVS_VBU_LINE_DEALLOCATE",
+        "TBOx_IDVS_VAR_SHAD_REQ",
+        "TBOx_IDVS_VAR_SHAD_STALL",
+        "TBOx_BINNER_STALL",
+        "TBOx_ITER_STALL",
+        "TBOx_COMPRESS_MISS",
+        "TBOx_COMPRESS_STALL",
+        "TBOx_PCACHE_HIT",
+        "TBOx_PCACHE_MISS",
+        "TBOx_PCACHE_MISS_STALL",
+        "TBOx_PCACHE_EVICT_STALL",
+        "TBOx_PMGR_PTR_WR_STALL",
+        "TBOx_PMGR_PTR_RD_STALL",
+        "TBOx_PMGR_CMD_WR_STALL",
+        "TBOx_WRBUF_ACTIVE",
+        "TBOx_WRBUF_HIT",
+        "TBOx_WRBUF_MISS",
+        "TBOx_WRBUF_NO_FREE_LINE_STALL",
+        "TBOx_WRBUF_NO_AXI_ID_STALL",
+        "TBOx_WRBUF_AXI_STALL",
+        "TBOx_UTLB0_TRANS",
+        "TBOx_UTLB0_TRANS_HIT",
+        "TBOx_UTLB0_TRANS_STALL",
+        "TBOx_UTLB0_MMU_REQ",
+        "TBOx_UTLB1_TRANS",
+        "TBOx_UTLB1_TRANS_HIT",
+        "TBOx_UTLB1_TRANS_STALL",
+        "TBOx_UTLB1_MMU_REQ",
+
+        /* Shader Core */
+        "",
+        "",
+        "",
+        "",
+        "TBOx_FRAG_ACTIVE",
+        "TBOx_FRAG_PRIMITIVES_OUT",
+        "TBOx_FRAG_PRIM_RAST",
+        "TBOx_FRAG_FPK_ACTIVE",
+        "TBOx_FRAG_STARVING",
+        "TBOx_FRAG_WARPS",
+        "TBOx_FRAG_PARTIAL_QUADS_RAST",
+        "TBOx_FRAG_QUADS_RAST",
+        "TBOx_FRAG_QUADS_EZS_TEST",
+        "TBOx_FRAG_QUADS_EZS_UPDATE",
+        "TBOx_FRAG_QUADS_EZS_KILL",
+        "TBOx_FRAG_LZS_TEST",
+        "TBOx_FRAG_LZS_KILL",
+        "TBOx_WARP_REG_SIZE_64",
+        "TBOx_FRAG_PTILES",
+        "TBOx_FRAG_TRANS_ELIM",
+        "TBOx_QUAD_FPK_KILLER",
+        "TBOx_FULL_QUAD_WARPS",
+        "TBOx_COMPUTE_ACTIVE",
+        "TBOx_COMPUTE_TASKS",
+        "TBOx_COMPUTE_WARPS",
+        "TBOx_COMPUTE_STARVING",
+        "TBOx_EXEC_CORE_ACTIVE",
+        "TBOx_EXEC_INSTR_FMA",
+        "TBOx_EXEC_INSTR_CVT",
+        "TBOx_EXEC_INSTR_SFU",
+        "TBOx_EXEC_INSTR_MSG",
+        "TBOx_EXEC_INSTR_DIVERGED",
+        "TBOx_EXEC_ICACHE_MISS",
+        "TBOx_EXEC_STARVE_ARITH",
+        "TBOx_CALL_BLEND_SHADER",
+        "TBOx_TEX_MSGI_NUM_FLITS",
+        "TBOx_TEX_DFCH_CLK_STALLED",
+        "TBOx_TEX_TFCH_CLK_STALLED",
+        "TBOx_TEX_TFCH_STARVED_PENDING_DATA_FETCH",
+        "TBOx_TEX_FILT_NUM_OPERATIONS",
+        "TBOx_TEX_FILT_NUM_FXR_OPERATIONS",
+        "TBOx_TEX_FILT_NUM_FST_OPERATIONS",
+        "TBOx_TEX_MSGO_NUM_MSG",
+        "TBOx_TEX_MSGO_NUM_FLITS",
+        "TBOx_LS_MEM_READ_FULL",
+        "TBOx_LS_MEM_READ_SHORT",
+        "TBOx_LS_MEM_WRITE_FULL",
+        "TBOx_LS_MEM_WRITE_SHORT",
+        "TBOx_LS_MEM_ATOMIC",
+        "TBOx_VARY_INSTR",
+        "TBOx_VARY_SLOT_32",
+        "TBOx_VARY_SLOT_16",
+        "TBOx_ATTR_INSTR",
+        "TBOx_SHADER_CORE_ACTIVE",
+        "TBOx_BEATS_RD_FTC",
+        "TBOx_BEATS_RD_FTC_EXT",
+        "TBOx_BEATS_RD_LSC",
+        "TBOx_BEATS_RD_LSC_EXT",
+        "TBOx_BEATS_RD_TEX",
+        "TBOx_BEATS_RD_TEX_EXT",
+        "TBOx_BEATS_RD_OTHER",
+        "TBOx_BEATS_WR_LSC_OTHER",
+        "TBOx_BEATS_WR_TIB",
+        "TBOx_BEATS_WR_LSC_WB",
+
+        /* L2 and MMU */
+        "",
+        "",
+        "",
+        "",
+        "TBOx_MMU_REQUESTS",
+        "TBOx_MMU_TABLE_READS_L3",
+        "TBOx_MMU_TABLE_READS_L2",
+        "TBOx_MMU_HIT_L3",
+        "TBOx_MMU_HIT_L2",
+        "TBOx_MMU_S2_REQUESTS",
+        "TBOx_MMU_S2_TABLE_READS_L3",
+        "TBOx_MMU_S2_TABLE_READS_L2",
+        "TBOx_MMU_S2_HIT_L3",
+        "TBOx_MMU_S2_HIT_L2",
+        "",
+        "",
+        "TBOx_L2_RD_MSG_IN",
+        "TBOx_L2_RD_MSG_IN_STALL",
+        "TBOx_L2_WR_MSG_IN",
+        "TBOx_L2_WR_MSG_IN_STALL",
+        "TBOx_L2_SNP_MSG_IN",
+        "TBOx_L2_SNP_MSG_IN_STALL",
+        "TBOx_L2_RD_MSG_OUT",
+        "TBOx_L2_RD_MSG_OUT_STALL",
+        "TBOx_L2_WR_MSG_OUT",
+        "TBOx_L2_ANY_LOOKUP",
+        "TBOx_L2_READ_LOOKUP",
+        "TBOx_L2_WRITE_LOOKUP",
+        "TBOx_L2_EXT_SNOOP_LOOKUP",
+        "TBOx_L2_EXT_READ",
+        "TBOx_L2_EXT_READ_NOSNP",
+        "TBOx_L2_EXT_READ_UNIQUE",
+        "TBOx_L2_EXT_READ_BEATS",
+        "TBOx_L2_EXT_AR_STALL",
+        "TBOx_L2_EXT_AR_CNT_Q1",
+        "TBOx_L2_EXT_AR_CNT_Q2",
+        "TBOx_L2_EXT_AR_CNT_Q3",
+        "TBOx_L2_EXT_RRESP_0_127",
+        "TBOx_L2_EXT_RRESP_128_191",
+        "TBOx_L2_EXT_RRESP_192_255",
+        "TBOx_L2_EXT_RRESP_256_319",
+        "TBOx_L2_EXT_RRESP_320_383",
+        "TBOx_L2_EXT_WRITE",
+        "TBOx_L2_EXT_WRITE_NOSNP_FULL",
+        "TBOx_L2_EXT_WRITE_NOSNP_PTL",
+        "TBOx_L2_EXT_WRITE_SNP_FULL",
+        "TBOx_L2_EXT_WRITE_SNP_PTL",
+        "TBOx_L2_EXT_WRITE_BEATS",
+        "TBOx_L2_EXT_W_STALL",
+        "TBOx_L2_EXT_AW_CNT_Q1",
+        "TBOx_L2_EXT_AW_CNT_Q2",
+        "TBOx_L2_EXT_AW_CNT_Q3",
+        "TBOx_L2_EXT_SNOOP",
+        "TBOx_L2_EXT_SNOOP_STALL",
+        "TBOx_L2_EXT_SNOOP_RESP_CLEAN",
+        "TBOx_L2_EXT_SNOOP_RESP_DATA",
+        "TBOx_L2_EXT_SNOOP_INTERNAL",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+        "",
+    };
+
+enum
+{
+	/* Product id masks for old and new versions of the id field. NB: the T60x must be tested before anything else as it could exceptionally be
+         * treated as a new style of id with product code 0x6006 */
+	PRODUCT_ID_MASK_OLD = 0xffff,
+	PRODUCT_ID_MASK_NEW = 0xf00f,
+	/* Old style product ids */
+	PRODUCT_ID_T60X = 0x6956,
+	PRODUCT_ID_T62X = 0x0620,
+	PRODUCT_ID_T72X = 0x0720,
+	PRODUCT_ID_T76X = 0x0750,
+	PRODUCT_ID_T82X = 0x0820,
+	PRODUCT_ID_T83X = 0x0830,
+	PRODUCT_ID_T86X = 0x0860,
+	PRODUCT_ID_TFRX = 0x0880,
+	/* New style product ids */
+	PRODUCT_ID_TMIX = 0x6000,
+	PRODUCT_ID_THEX = 0x6001,
+	PRODUCT_ID_TSIX = 0x7000,
+	PRODUCT_ID_TNOX = 0x7001,
+	PRODUCT_ID_TGOX = 0x7002,
+	PRODUCT_ID_TDVX = 0x7003,
+	PRODUCT_ID_TTRX = 0x9000,
+	PRODUCT_ID_TNAXa = 0x9001,
+	PRODUCT_ID_TNAXb = 0x9003,
+	PRODUCT_ID_TOTX = 0x9004,
+	PRODUCT_ID_TBOX = 0x9002
+};
+
+struct CounterMapping
+{
+	uint32_t           product_mask;
+	uint32_t           product_id;
+	const char *const *names_lut;
+};
+
+static const CounterMapping products[] =
+    {
+        {
+            PRODUCT_ID_MASK_OLD,
+            PRODUCT_ID_T60X,
+            hardware_counters_mali_t60x,
+        },
+        {
+            PRODUCT_ID_MASK_OLD,
+            PRODUCT_ID_T62X,
+            hardware_counters_mali_t62x,
+        },
+        {
+            PRODUCT_ID_MASK_OLD,
+            PRODUCT_ID_T72X,
+            hardware_counters_mali_t72x,
+        },
+        {
+            PRODUCT_ID_MASK_OLD,
+            PRODUCT_ID_T76X,
+            hardware_counters_mali_t76x,
+        },
+        {
+            PRODUCT_ID_MASK_OLD,
+            PRODUCT_ID_T82X,
+            hardware_counters_mali_t82x,
+        },
+        {
+            PRODUCT_ID_MASK_OLD,
+            PRODUCT_ID_T83X,
+            hardware_counters_mali_t83x,
+        },
+        {
+            PRODUCT_ID_MASK_OLD,
+            PRODUCT_ID_T86X,
+            hardware_counters_mali_t86x,
+        },
+        {
+            PRODUCT_ID_MASK_OLD,
+            PRODUCT_ID_TFRX,
+            hardware_counters_mali_t88x,
+        },
+        {
+            PRODUCT_ID_MASK_NEW,
+            PRODUCT_ID_TMIX,
+            hardware_counters_mali_tMIx,
+        },
+        {
+            PRODUCT_ID_MASK_NEW,
+            PRODUCT_ID_THEX,
+            hardware_counters_mali_tHEx,
+        },
+        {
+            PRODUCT_ID_MASK_NEW,
+            PRODUCT_ID_TSIX,
+            hardware_counters_mali_tSIx,
+        },
+        {
+            PRODUCT_ID_MASK_NEW,
+            PRODUCT_ID_TNOX,
+            hardware_counters_mali_tNOx,
+        },
+        {
+            PRODUCT_ID_MASK_NEW,
+            PRODUCT_ID_TNAXa,
+            hardware_counters_mali_tNAx,
+        },
+        {
+            PRODUCT_ID_MASK_NEW,
+            PRODUCT_ID_TNAXb,
+            hardware_counters_mali_tNAx,
+        },
+        {
+            PRODUCT_ID_MASK_NEW,
+            PRODUCT_ID_TTRX,
+            hardware_counters_mali_tTRx,
+        },
+        {
+            PRODUCT_ID_MASK_NEW,
+            PRODUCT_ID_TOTX,
+            hardware_counters_mali_tOTx,
+        },
+        {
+            PRODUCT_ID_MASK_NEW,
+            PRODUCT_ID_TBOX,
+            hardware_counters_mali_tBOx,
+        },
+    };
+
+enum
+{
+	NUM_PRODUCTS = sizeof(products) / sizeof(products[0])
+};
+}        // namespace mali_userspace
+
+#endif /* DOXYGEN_SKIP_THIS */

+ 660 - 0
ThirdParty/HwcPipe/vendor/arm/mali/mali_profiler.cpp

@@ -0,0 +1,660 @@
+/*
+ * Copyright (c) 2017-2020 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "mali_profiler.h"
+
+#include "hwcpipe_log.h"
+
+#include <algorithm>
+#include <stdexcept>
+
+using mali_userspace::MALI_NAME_BLOCK_JM;
+using mali_userspace::MALI_NAME_BLOCK_MMU;
+using mali_userspace::MALI_NAME_BLOCK_SHADER;
+using mali_userspace::MALI_NAME_BLOCK_TILER;
+
+namespace hwcpipe
+{
+namespace
+{
+struct MaliHWInfo
+{
+	unsigned mp_count;
+	unsigned gpu_id;
+	unsigned r_value;
+	unsigned p_value;
+	unsigned core_mask;
+	unsigned l2_slices;
+};
+
+MaliHWInfo get_mali_hw_info(const char *path)
+{
+	int fd = open(path, O_RDWR);        // NOLINT
+
+	if (fd < 0)
+	{
+		throw std::runtime_error("Failed to get HW info.");
+	}
+
+	{
+		mali_userspace::kbase_uk_hwcnt_reader_version_check_args version_check_args;
+		version_check_args.header.id = mali_userspace::UKP_FUNC_ID_CHECK_VERSION;        // NOLINT
+		version_check_args.major     = 10;
+		version_check_args.minor     = 2;
+
+		if (mali_userspace::mali_ioctl(fd, version_check_args) != 0)
+		{
+			mali_userspace::kbase_ioctl_version_check _version_check_args = {0, 0};
+			if (ioctl(fd, KBASE_IOCTL_VERSION_CHECK, &_version_check_args) < 0)
+			{
+				close(fd);
+				throw std::runtime_error("Failed to check version.");
+			}
+		}
+	}
+
+	{
+		mali_userspace::kbase_uk_hwcnt_reader_set_flags flags;        // NOLINT
+		memset(&flags, 0, sizeof(flags));
+		flags.header.id    = mali_userspace::KBASE_FUNC_SET_FLAGS;        // NOLINT
+		flags.create_flags = mali_userspace::BASE_CONTEXT_CREATE_KERNEL_FLAGS;
+
+		if (mali_userspace::mali_ioctl(fd, flags) != 0)
+		{
+			mali_userspace::kbase_ioctl_set_flags _flags = {1u << 1};
+			if (ioctl(fd, KBASE_IOCTL_SET_FLAGS, &_flags) < 0)
+			{
+				close(fd);
+				throw std::runtime_error("Failed settings flags ioctl.");
+			}
+		}
+	}
+
+	{
+		MaliHWInfo hw_info;        // NOLINT
+		memset(&hw_info, 0, sizeof(hw_info));
+		mali_userspace::kbase_uk_gpuprops props = {};
+		props.header.id                         = mali_userspace::KBASE_FUNC_GPU_PROPS_REG_DUMP;
+		if (mali_ioctl(fd, props) == 0)
+		{
+			hw_info.gpu_id  = props.props.core_props.product_id;
+			hw_info.r_value = props.props.core_props.major_revision;
+			hw_info.p_value = props.props.core_props.minor_revision;
+			for (uint32_t i = 0; i < props.props.coherency_info.num_core_groups; i++)
+				hw_info.core_mask |= props.props.coherency_info.group[i].core_mask;
+			hw_info.mp_count  = __builtin_popcountll(hw_info.core_mask);
+			hw_info.l2_slices = props.props.l2_props.num_l2_slices;
+
+			close(fd);
+		}
+		else
+		{
+			mali_userspace::kbase_ioctl_get_gpuprops get_props = {};
+			int                                      ret;
+			if ((ret = ioctl(fd, KBASE_IOCTL_GET_GPUPROPS, &get_props)) < 0)
+			{
+				throw std::runtime_error("Failed getting GPU properties.");
+				close(fd);
+			}
+
+			get_props.size = ret;
+			std::vector<uint8_t> buffer(ret);
+			get_props.buffer.value = buffer.data();
+			ret                    = ioctl(fd, KBASE_IOCTL_GET_GPUPROPS, &get_props);
+			if (ret < 0)
+			{
+				throw std::runtime_error("Failed getting GPU properties.");
+				close(fd);
+			}
+
+#define READ_U8(p) ((p)[0])
+#define READ_U16(p) (READ_U8((p)) | (uint16_t(READ_U8((p) + 1)) << 8))
+#define READ_U32(p) (READ_U16((p)) | (uint32_t(READ_U16((p) + 2)) << 16))
+#define READ_U64(p) (READ_U32((p)) | (uint64_t(READ_U32((p) + 4)) << 32))
+
+			mali_userspace::gpu_props props = {};
+
+			const auto *ptr  = buffer.data();
+			int         size = ret;
+			while (size > 0)
+			{
+				uint32_t type       = READ_U32(ptr);
+				uint32_t value_type = type & 3;
+				uint64_t value;
+
+				ptr += 4;
+				size -= 4;
+
+				switch (value_type)
+				{
+					case KBASE_GPUPROP_VALUE_SIZE_U8:
+						value = READ_U8(ptr);
+						ptr++;
+						size--;
+						break;
+					case KBASE_GPUPROP_VALUE_SIZE_U16:
+						value = READ_U16(ptr);
+						ptr += 2;
+						size -= 2;
+						break;
+					case KBASE_GPUPROP_VALUE_SIZE_U32:
+						value = READ_U32(ptr);
+						ptr += 4;
+						size -= 4;
+						break;
+					case KBASE_GPUPROP_VALUE_SIZE_U64:
+						value = READ_U64(ptr);
+						ptr += 8;
+						size -= 8;
+						break;
+				}
+
+				for (unsigned i = 0; mali_userspace::gpu_property_mapping[i].type; i++)
+				{
+					if (mali_userspace::gpu_property_mapping[i].type == (type >> 2))
+					{
+						auto  offset = mali_userspace::gpu_property_mapping[i].offset;
+						void *p      = reinterpret_cast<uint8_t *>(&props) + offset;
+						switch (mali_userspace::gpu_property_mapping[i].size)
+						{
+							case 1:
+								*reinterpret_cast<uint8_t *>(p) = value;
+								break;
+							case 2:
+								*reinterpret_cast<uint16_t *>(p) = value;
+								break;
+							case 4:
+								*reinterpret_cast<uint32_t *>(p) = value;
+								break;
+							case 8:
+								*reinterpret_cast<uint64_t *>(p) = value;
+								break;
+							default:
+								throw std::runtime_error("Invalid property size.");
+								close(fd);
+						}
+						break;
+					}
+				}
+			}
+
+			hw_info.gpu_id  = props.product_id;
+			hw_info.r_value = props.major_revision;
+			hw_info.p_value = props.minor_revision;
+			for (uint32_t i = 0; i < props.num_core_groups; i++)
+				hw_info.core_mask |= props.core_mask[i];
+			hw_info.mp_count  = __builtin_popcountll(hw_info.core_mask);
+			hw_info.l2_slices = props.l2_slices;
+
+			close(fd);
+		}
+
+		return hw_info;
+	}
+}
+}        // namespace
+
+typedef std::function<uint64_t(void)> MaliValueGetter;
+
+MaliProfiler::MaliProfiler(const GpuCounterSet &enabled_counters) :
+    enabled_counters_(enabled_counters)
+{
+	// Throws if setup fails
+	init();
+
+	const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> valhall_mappings = {
+	    {GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
+	    {GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
+	    {GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
+	    {GpuCounter::TilerCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TILER_ACTIVE"); }},
+
+	    {GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
+	    {GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
+	    {GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
+
+	    {GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
+	    {GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
+	    {GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
+	    {GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILL"); }},
+	    {GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_TEST"); }},
+	    {GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_KILL"); }},
+
+	    {GpuCounter::Instructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_FMA") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_CVT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_SFU") + get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_MSG"); }},
+	    {GpuCounter::DivergedInstructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_DIVERGED"); }},
+
+	    {GpuCounter::ShaderCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_CORE_ACTIVE"); }},
+	    // The three units run in parallel so we can approximate cycles by taking the largest value. SFU instructions use 4 cycles per warp.
+	    {GpuCounter::ShaderArithmeticCycles, [this] { return std::max(get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_FMA"), std::max(get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_CVT"), 4 * get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_SFU"))); }},
+	    {GpuCounter::ShaderLoadStoreCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_ATOMIC"); }},
+	    {GpuCounter::ShaderTextureCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_FILT_NUM_OPERATIONS"); }},
+
+	    {GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
+	    {GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
+	    {GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
+	    {GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
+	    {GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
+	    {GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
+	    {GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
+	    {GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
+	};
+
+	const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> bifrost_mappings = {
+	    {GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
+	    {GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
+	    {GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
+	    {GpuCounter::TilerCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_TILER, "TILER_ACTIVE"); }},
+
+	    {GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
+	    {GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
+	    {GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
+
+	    {GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
+	    {GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
+	    {GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
+	    {GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILL"); }},
+	    {GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_TEST"); }},
+	    {GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_LZS_KILL"); }},
+
+	    {GpuCounter::Instructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_COUNT"); }},
+	    {GpuCounter::DivergedInstructions, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_DIVERGED"); }},
+
+	    {GpuCounter::ShaderCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_CORE_ACTIVE"); }},
+	    {GpuCounter::ShaderArithmeticCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "EXEC_INSTR_COUNT"); }},
+	    {GpuCounter::ShaderLoadStoreCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_FULL") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_READ_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_WRITE_SHORT") + get_counter_value(MALI_NAME_BLOCK_SHADER, "LS_MEM_ATOMIC"); }},
+	    {GpuCounter::ShaderTextureCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_FILT_NUM_OPERATIONS"); }},
+
+	    {GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
+	    {GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
+	    {GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
+	    {GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
+	    {GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
+	    {GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
+	    {GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
+	    {GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
+	};
+
+	const std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> midgard_mappings = {
+	    {GpuCounter::GpuCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "GPU_ACTIVE"); }},
+	    {GpuCounter::VertexComputeCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_ACTIVE"); }},
+	    {GpuCounter::FragmentCycles, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_ACTIVE"); }},
+
+	    {GpuCounter::VertexComputeJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS1_JOBS"); }},
+	    {GpuCounter::FragmentJobs, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_JOBS"); }},
+	    {GpuCounter::Pixels, [this] { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 1024; }},
+
+	    {GpuCounter::Tiles, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_PTILES"); }},
+	    {GpuCounter::TransactionEliminations, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_TRANS_ELIM"); }},
+	    {GpuCounter::EarlyZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_TEST"); }},
+	    {GpuCounter::EarlyZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_QUADS_EZS_KILLED"); }},
+	    {GpuCounter::LateZTests, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_THREADS_LZS_TEST"); }},
+	    {GpuCounter::LateZKilled, [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "FRAG_THREADS_LZS_KILLED"); }},
+
+	    {GpuCounter::CacheReadLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_READ_LOOKUP"); }},
+	    {GpuCounter::CacheWriteLookups, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_WRITE_LOOKUP"); }},
+	    {GpuCounter::ExternalMemoryReadAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ"); }},
+	    {GpuCounter::ExternalMemoryWriteAccesses, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE"); }},
+	    {GpuCounter::ExternalMemoryReadStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_AR_STALL"); }},
+	    {GpuCounter::ExternalMemoryWriteStalls, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_W_STALL"); }},
+	    {GpuCounter::ExternalMemoryReadBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_READ_BEATS") * 16; }},
+	    {GpuCounter::ExternalMemoryWriteBytes, [this] { return get_counter_value(MALI_NAME_BLOCK_MMU, "L2_EXT_WRITE_BEATS") * 16; }},
+	};
+
+	auto product = std::find_if(std::begin(mali_userspace::products), std::end(mali_userspace::products), [&](const mali_userspace::CounterMapping &cm) {
+		return (cm.product_mask & gpu_id_) == cm.product_id;
+	});
+
+	if (product != std::end(mali_userspace::products))
+	{
+		switch (product->product_id)
+		{
+			case mali_userspace::PRODUCT_ID_T60X:
+			case mali_userspace::PRODUCT_ID_T62X:
+			case mali_userspace::PRODUCT_ID_T72X:
+				mappings_                     = midgard_mappings;
+				mappings_[GpuCounter::Pixels] = [this]() { return get_counter_value(MALI_NAME_BLOCK_JM, "JS0_TASKS") * 256; };
+				break;
+			case mali_userspace::PRODUCT_ID_T76X:
+			case mali_userspace::PRODUCT_ID_T82X:
+			case mali_userspace::PRODUCT_ID_T83X:
+			case mali_userspace::PRODUCT_ID_T86X:
+			case mali_userspace::PRODUCT_ID_TFRX:
+				mappings_ = midgard_mappings;
+				break;
+			case mali_userspace::PRODUCT_ID_TMIX:
+			case mali_userspace::PRODUCT_ID_THEX:
+				mappings_                                  = bifrost_mappings;
+				mappings_[GpuCounter::ShaderTextureCycles] = [this] { return get_counter_value(MALI_NAME_BLOCK_SHADER, "TEX_COORD_ISSUE"); };
+				break;
+			case mali_userspace::PRODUCT_ID_TSIX:
+			case mali_userspace::PRODUCT_ID_TNOX:
+			case mali_userspace::PRODUCT_ID_TGOX:
+			case mali_userspace::PRODUCT_ID_TDVX:
+				mappings_ = bifrost_mappings;
+				break;
+			case mali_userspace::PRODUCT_ID_TNAXa:
+			case mali_userspace::PRODUCT_ID_TNAXb:
+			case mali_userspace::PRODUCT_ID_TTRX:
+			case mali_userspace::PRODUCT_ID_TOTX:
+			case mali_userspace::PRODUCT_ID_TBOX:
+			default:
+				mappings_ = valhall_mappings;
+				break;
+		}
+	}
+	else
+	{
+		HWCPIPE_LOG("Mali counters initialization failed: Failed to identify GPU");
+	}
+}
+
+void MaliProfiler::init()
+{
+	MaliHWInfo hw_info = get_mali_hw_info(device_);
+
+	num_cores_     = hw_info.mp_count;
+	num_l2_slices_ = hw_info.l2_slices;
+	gpu_id_        = hw_info.gpu_id;
+
+	fd_ = open(device_, O_RDWR | O_CLOEXEC | O_NONBLOCK);        // NOLINT
+
+	if (fd_ < 0)
+	{
+		throw std::runtime_error("Failed to open /dev/mali0.");
+	}
+
+	{
+		mali_userspace::kbase_uk_hwcnt_reader_version_check_args check;        // NOLINT
+		memset(&check, 0, sizeof(check));
+
+		if (mali_userspace::mali_ioctl(fd_, check) != 0)
+		{
+			mali_userspace::kbase_ioctl_version_check _check = {0, 0};
+			if (ioctl(fd_, KBASE_IOCTL_VERSION_CHECK, &_check) < 0)
+			{
+				throw std::runtime_error("Failed to get ABI version.");
+			}
+		}
+		else if (check.major < 10)
+		{
+			throw std::runtime_error("Unsupported ABI version 10.");
+		}
+	}
+
+	{
+		mali_userspace::kbase_uk_hwcnt_reader_set_flags flags;        // NOLINT
+		memset(&flags, 0, sizeof(flags));
+		flags.header.id    = mali_userspace::KBASE_FUNC_SET_FLAGS;        // NOLINT
+		flags.create_flags = mali_userspace::BASE_CONTEXT_CREATE_KERNEL_FLAGS;
+
+		if (mali_userspace::mali_ioctl(fd_, flags) != 0)
+		{
+			mali_userspace::kbase_ioctl_set_flags _flags = {1u << 1};
+			if (ioctl(fd_, KBASE_IOCTL_SET_FLAGS, &_flags) < 0)
+			{
+				throw std::runtime_error("Failed settings flags ioctl.");
+			}
+		}
+	}
+
+	{
+		mali_userspace::kbase_uk_hwcnt_reader_setup setup;        // NOLINT
+		memset(&setup, 0, sizeof(setup));
+		setup.header.id    = mali_userspace::KBASE_FUNC_HWCNT_READER_SETUP;        // NOLINT
+		setup.buffer_count = buffer_count_;
+		setup.jm_bm        = -1;
+		setup.shader_bm    = -1;
+		setup.tiler_bm     = -1;
+		setup.mmu_l2_bm    = -1;
+		setup.fd           = -1;
+
+		if (mali_userspace::mali_ioctl(fd_, setup) != 0)
+		{
+			mali_userspace::kbase_ioctl_hwcnt_reader_setup _setup = {};
+			_setup.buffer_count                                   = buffer_count_;
+			_setup.jm_bm                                          = -1;
+			_setup.shader_bm                                      = -1;
+			_setup.tiler_bm                                       = -1;
+			_setup.mmu_l2_bm                                      = -1;
+
+			int ret;
+			if ((ret = ioctl(fd_, KBASE_IOCTL_HWCNT_READER_SETUP, &_setup)) < 0)
+			{
+				throw std::runtime_error("Failed setting hwcnt reader ioctl.");
+			}
+			hwc_fd_ = ret;
+		}
+		else
+		{
+			hwc_fd_ = setup.fd;
+		}
+	}
+
+	{
+		uint32_t api_version = ~mali_userspace::HWCNT_READER_API;
+
+		if (ioctl(hwc_fd_, mali_userspace::KBASE_HWCNT_READER_GET_API_VERSION, &api_version) != 0)        // NOLINT
+		{
+			throw std::runtime_error("Could not determine hwcnt reader API.");
+		}
+		else if (api_version != mali_userspace::HWCNT_READER_API)
+		{
+			throw std::runtime_error("Invalid API version.");
+		}
+	}
+
+	if (ioctl(hwc_fd_, static_cast<int>(mali_userspace::KBASE_HWCNT_READER_GET_BUFFER_SIZE), &buffer_size_) != 0)        // NOLINT
+	{
+		throw std::runtime_error("Failed to get buffer size.");
+	}
+
+	if (ioctl(hwc_fd_, static_cast<int>(mali_userspace::KBASE_HWCNT_READER_GET_HWVER), &hw_ver_) != 0)        // NOLINT
+	{
+		throw std::runtime_error("Could not determine HW version.");
+	}
+
+	if (hw_ver_ < 5)
+	{
+		throw std::runtime_error("Unsupported HW version.");
+	}
+
+	sample_data_ = static_cast<uint8_t *>(mmap(nullptr, buffer_count_ * buffer_size_, PROT_READ, MAP_PRIVATE, hwc_fd_, 0));
+
+	if (sample_data_ == MAP_FAILED)        // NOLINT
+	{
+		throw std::runtime_error("Failed to map sample data.");
+	}
+
+	auto product = std::find_if(std::begin(mali_userspace::products), std::end(mali_userspace::products), [&](const mali_userspace::CounterMapping &cm) {
+		return (cm.product_mask & hw_info.gpu_id) == cm.product_id;
+	});
+
+	if (product != std::end(mali_userspace::products))
+	{
+		names_lut_ = product->names_lut;
+	}
+	else
+	{
+		throw std::runtime_error("Could not identify GPU.");
+	}
+
+	raw_counter_buffer_.resize(buffer_size_ / sizeof(uint32_t));
+
+	// Build core remap table.
+	core_index_remap_.clear();
+	core_index_remap_.reserve(hw_info.mp_count);
+
+	unsigned int mask = hw_info.core_mask;
+
+	while (mask != 0)
+	{
+		unsigned int bit = __builtin_ctz(mask);
+		core_index_remap_.push_back(bit);
+		mask &= ~(1u << bit);
+	}
+}
+
+void MaliProfiler::run()
+{
+	sample_counters();
+	wait_next_event();
+}
+
+void MaliProfiler::stop()
+{
+	// We don't need to do anything on stop()
+}
+
+const GpuMeasurements &MaliProfiler::sample()
+{
+	sample_counters();
+	wait_next_event();
+
+	for (const auto &counter : enabled_counters_)
+	{
+		auto mapping = mappings_.find(counter);
+		if (mapping == mappings_.end())
+		{
+			continue;
+		}
+
+		measurements_[mapping->first] = mapping->second();
+	}
+
+	return measurements_;
+}
+
+void MaliProfiler::sample_counters()
+{
+	if (ioctl(hwc_fd_, mali_userspace::KBASE_HWCNT_READER_DUMP, 0) != 0)
+	{
+		throw std::runtime_error("Could not sample hardware counters.");
+	}
+}
+
+void MaliProfiler::wait_next_event()
+{
+	pollfd poll_fd;        // NOLINT
+	poll_fd.fd     = hwc_fd_;
+	poll_fd.events = POLLIN;
+
+	const int count = poll(&poll_fd, 1, -1);
+
+	if (count < 0)
+	{
+		throw std::runtime_error("poll() failed.");
+	}
+
+	if ((poll_fd.revents & POLLIN) != 0)
+	{
+		mali_userspace::kbase_hwcnt_reader_metadata meta;        // NOLINT
+
+		if (ioctl(hwc_fd_, static_cast<int>(mali_userspace::KBASE_HWCNT_READER_GET_BUFFER), &meta) != 0)        // NOLINT
+		{
+			throw std::runtime_error("Failed READER_GET_BUFFER.");
+		}
+
+		memcpy(raw_counter_buffer_.data(), sample_data_ + buffer_size_ * meta.buffer_idx, buffer_size_);
+		timestamp_ = meta.timestamp;
+
+		if (ioctl(hwc_fd_, mali_userspace::KBASE_HWCNT_READER_PUT_BUFFER, &meta) != 0)        // NOLINT
+		{
+			throw std::runtime_error("Failed READER_PUT_BUFFER.");
+		}
+	}
+	else if ((poll_fd.revents & POLLHUP) != 0)
+	{
+		throw std::runtime_error("HWC hung up.");
+	}
+}
+
+uint64_t MaliProfiler::get_counter_value(mali_userspace::MaliCounterBlockName block, const char *name) const
+{
+	uint64_t sum = 0;
+	switch (block)
+	{
+		case mali_userspace::MALI_NAME_BLOCK_MMU:
+			// If an MMU counter is selected, sum the values over MMU slices
+			for (int i = 0; i < num_l2_slices_; i++)
+			{
+				sum += get_counters(block, i)[find_counter_index_by_name(block, name)];
+			}
+			return sum;
+
+		case mali_userspace::MALI_NAME_BLOCK_SHADER:
+			// If a shader core counter is selected, sum the values over shader cores
+			for (int i = 0; i < num_cores_; i++)
+			{
+				sum += get_counters(block, i)[find_counter_index_by_name(block, name)];
+			}
+			return sum;
+
+		case mali_userspace::MALI_NAME_BLOCK_JM:
+		case mali_userspace::MALI_NAME_BLOCK_TILER:
+		default:
+			return static_cast<uint64_t>(get_counters(block)[find_counter_index_by_name(block, name)]);
+	}
+}
+
+const uint32_t *MaliProfiler::get_counters(mali_userspace::MaliCounterBlockName block, int index) const
+{
+	switch (block)
+	{
+		case mali_userspace::MALI_NAME_BLOCK_JM:
+			return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * 0;
+		case mali_userspace::MALI_NAME_BLOCK_MMU:
+			if (index < 0 || index >= num_l2_slices_)
+			{
+				throw std::runtime_error("Invalid slice number.");
+			}
+
+			// If an MMU counter is selected, index refers to the MMU slice
+			return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * (2 + index);
+		case mali_userspace::MALI_NAME_BLOCK_TILER:
+			return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * 1;
+		default:
+			if (index < 0 || index >= num_cores_)
+			{
+				throw std::runtime_error("Invalid core number.");
+			}
+
+			// If a shader core counter is selected, index refers to the core index
+			return raw_counter_buffer_.data() + mali_userspace::MALI_NAME_BLOCK_SIZE * (2 + num_l2_slices_ + core_index_remap_[index]);
+	}
+}
+
+int MaliProfiler::find_counter_index_by_name(mali_userspace::MaliCounterBlockName block, const char *name) const
+{
+	const char *const *names = &names_lut_[mali_userspace::MALI_NAME_BLOCK_SIZE * block];
+
+	for (int i = 0; i < mali_userspace::MALI_NAME_BLOCK_SIZE; ++i)
+	{
+		if (strstr(names[i], name) != nullptr)
+		{
+			return i;
+		}
+	}
+
+	return -1;
+}
+
+}        // namespace hwcpipe

+ 124 - 0
ThirdParty/HwcPipe/vendor/arm/mali/mali_profiler.h

@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#include "gpu_profiler.h"
+
+#include "hwc.hpp"
+
+#include <functional>
+#include <vector>
+
+namespace hwcpipe
+{
+/** A Gpu profiler that uses Mali counter data. */
+class MaliProfiler : public GpuProfiler
+{
+  public:
+	explicit MaliProfiler(const GpuCounterSet &enabled_counters);
+	virtual ~MaliProfiler() = default;
+
+	virtual const GpuCounterSet &enabled_counters() const override
+	{
+		return enabled_counters_;
+	}
+
+	virtual const GpuCounterSet &supported_counters() const override
+	{
+		return supported_counters_;
+	};
+
+	virtual void set_enabled_counters(GpuCounterSet counters) override
+	{
+		enabled_counters_ = std::move(counters);
+	};
+
+	virtual void                   run() override;
+	virtual const GpuMeasurements &sample() override;
+	virtual void                   stop() override;
+
+  private:
+	GpuCounterSet enabled_counters_{};
+
+	const GpuCounterSet supported_counters_{
+	    GpuCounter::GpuCycles,
+	    GpuCounter::VertexComputeCycles,
+	    GpuCounter::FragmentCycles,
+	    GpuCounter::TilerCycles,
+	    GpuCounter::VertexComputeJobs,
+	    GpuCounter::Tiles,
+	    GpuCounter::TransactionEliminations,
+	    GpuCounter::FragmentJobs,
+	    GpuCounter::Pixels,
+	    GpuCounter::EarlyZTests,
+	    GpuCounter::EarlyZKilled,
+	    GpuCounter::LateZTests,
+	    GpuCounter::LateZKilled,
+	    GpuCounter::Instructions,
+	    GpuCounter::DivergedInstructions,
+	    GpuCounter::ShaderCycles,
+	    GpuCounter::ShaderArithmeticCycles,
+	    GpuCounter::ShaderLoadStoreCycles,
+	    GpuCounter::ShaderTextureCycles,
+	    GpuCounter::CacheReadLookups,
+	    GpuCounter::CacheWriteLookups,
+	    GpuCounter::ExternalMemoryReadAccesses,
+	    GpuCounter::ExternalMemoryWriteAccesses,
+	    GpuCounter::ExternalMemoryReadStalls,
+	    GpuCounter::ExternalMemoryWriteStalls,
+	    GpuCounter::ExternalMemoryReadBytes,
+	    GpuCounter::ExternalMemoryWriteBytes,
+	};
+
+	typedef std::function<double(void)>                             MaliValueGetter;
+	std::unordered_map<GpuCounter, MaliValueGetter, GpuCounterHash> mappings_{};
+
+	const char *const  device_{"/dev/mali0"};
+	int                num_cores_{0};
+	int                num_l2_slices_{0};
+	int                gpu_id_{0};
+	uint32_t           hw_ver_{0};
+	int                buffer_count_{16};
+	size_t             buffer_size_{0};
+	uint8_t *          sample_data_{nullptr};
+	uint64_t           timestamp_{0};
+	const char *const *names_lut_{
+	    nullptr};
+	std::vector<uint32_t>     raw_counter_buffer_{};
+	std::vector<unsigned int> core_index_remap_{};
+	int                       fd_{-1};
+	int                       hwc_fd_{-1};
+
+	GpuMeasurements measurements_{};
+
+	void            init();
+	void            sample_counters();
+	void            wait_next_event();
+	const uint32_t *get_counters(mali_userspace::MaliCounterBlockName block, int index = 0) const;
+	uint64_t        get_counter_value(mali_userspace::MaliCounterBlockName block, const char *name) const;
+	int             find_counter_index_by_name(mali_userspace::MaliCounterBlockName block, const char *name) const;
+};
+
+}        // namespace hwcpipe

+ 201 - 0
ThirdParty/HwcPipe/vendor/arm/pmu/pmu_counter.cpp

@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pmu_counter.h"
+
+#include <asm/unistd.h>
+#include <cstring>
+#include <stdexcept>
+#include <sys/ioctl.h>
+#include <linux/version.h>
+
+PmuCounter::PmuCounter() :
+    _perf_config()
+{
+	_perf_config.size = sizeof(perf_event_attr);
+
+	// Start disabled
+	_perf_config.disabled = 1;
+	// The inherit bit specifies that this counter should count events of child
+	// tasks as well as the task specified
+	_perf_config.inherit = 1;
+	// Enables saving of event counts on context switch for inherited tasks
+	_perf_config.inherit_stat = 1;
+}
+
+PmuCounter::PmuCounter(PmuEventInfo config) :
+    PmuCounter()
+{
+	open(config);
+}
+
+PmuCounter::~PmuCounter()
+{
+	close();
+}
+
+void PmuCounter::open(PmuEventInfo config)
+{
+	_perf_config.config = config.event;
+	_perf_config.type   = config.type;
+	open(_perf_config);
+}
+
+void PmuCounter::open(const perf_event_attr &perf_config)
+{
+	// Measure this process/thread (+ children) on any CPU
+	_fd = syscall(__NR_perf_event_open, &perf_config, 0, -1, -1, 0);
+
+	if (_fd < 0)
+	{
+		throw std::runtime_error("perf_event_open failed. Counter ID: " + config_to_str(_perf_config));
+	}
+
+	const int result = ioctl(_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (result == -1)
+	{
+		throw std::runtime_error("Failed to enable PMU counter: " + std::string(strerror(errno)));
+	}
+}
+
+void PmuCounter::close()
+{
+	if (_fd != -1)
+	{
+		::close(_fd);
+		_fd = -1;
+	}
+}
+
+bool PmuCounter::reset()
+{
+	const int result = ioctl(_fd, PERF_EVENT_IOC_RESET, 0);
+
+	if (result == -1)
+	{
+		throw std::runtime_error("Failed to reset PMU counter: " + std::string(std::strerror(errno)));
+	}
+
+	return result != -1;
+}
+
+std::string PmuCounter::config_to_str(const perf_event_attr &perf_config)
+{
+	switch (perf_config.type)
+	{
+		case PERF_TYPE_HARDWARE:
+			switch (perf_config.config)
+			{
+				case PERF_COUNT_HW_CPU_CYCLES:
+					return "PERF_COUNT_HW_CPU_CYCLES";
+				case PERF_COUNT_HW_INSTRUCTIONS:
+					return "PERF_COUNT_HW_INSTRUCTIONS";
+				case PERF_COUNT_HW_CACHE_REFERENCES:
+					return "PERF_COUNT_HW_CACHE_REFERENCES";
+				case PERF_COUNT_HW_CACHE_MISSES:
+					return "PERF_COUNT_HW_CACHE_MISSES";
+				case PERF_COUNT_HW_BRANCH_INSTRUCTIONS:
+					return "PERF_COUNT_HW_BRANCH_INSTRUCTIONS";
+				case PERF_COUNT_HW_BRANCH_MISSES:
+					return "PERF_COUNT_HW_BRANCH_MISSES";
+				case PERF_COUNT_HW_BUS_CYCLES:
+					return "PERF_COUNT_HW_BUS_CYCLES";
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,0)
+				case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND:
+					return "PERF_COUNT_HW_STALLED_CYCLES_FRONTEND";
+				case PERF_COUNT_HW_STALLED_CYCLES_BACKEND:
+					return "PERF_COUNT_HW_STALLED_CYCLES_BACKEND";
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0)
+				case PERF_COUNT_HW_REF_CPU_CYCLES:
+					return "PERF_COUNT_HW_REF_CPU_CYCLES";
+#endif
+				default:
+					return "UNKNOWN HARDWARE COUNTER";
+			}
+
+		case PERF_TYPE_SOFTWARE:
+			switch (perf_config.config)
+			{
+				case PERF_COUNT_SW_CPU_CLOCK:
+					return "PERF_COUNT_SW_CPU_CLOCK";
+				case PERF_COUNT_SW_TASK_CLOCK:
+					return "PERF_COUNT_SW_TASK_CLOCK";
+				case PERF_COUNT_SW_PAGE_FAULTS:
+					return "PERF_COUNT_SW_PAGE_FAULTS";
+				case PERF_COUNT_SW_CONTEXT_SWITCHES:
+					return "PERF_COUNT_SW_CONTEXT_SWITCHES";
+				case PERF_COUNT_SW_CPU_MIGRATIONS:
+					return "PERF_COUNT_SW_CPU_MIGRATIONS";
+				case PERF_COUNT_SW_PAGE_FAULTS_MIN:
+					return "PERF_COUNT_SW_PAGE_FAULTS_MIN";
+				case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
+					return "PERF_COUNT_SW_PAGE_FAULTS_MAJ";
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,33)
+				case PERF_COUNT_SW_ALIGNMENT_FAULTS:
+					return "PERF_COUNT_SW_ALIGNMENT_FAULTS";
+				case PERF_COUNT_SW_EMULATION_FAULTS:
+					return "PERF_COUNT_SW_EMULATION_FAULTS";
+#endif
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,0)
+				case PERF_COUNT_SW_DUMMY:
+					return "PERF_COUNT_SW_DUMMY";
+#endif
+				default:
+					return "UNKNOWN SOFTWARE COUNTER";
+			}
+
+		case PERF_TYPE_RAW:
+			switch (static_cast<PmuImplDefined>(perf_config.config))
+			{
+				case PmuImplDefined::L1_ACCESSES:
+					return "L1_ACCESSES";
+				case PmuImplDefined::INSTR_RETIRED:
+					return "INSTR_RETIRED";
+				case PmuImplDefined::L2_ACCESSES:
+					return "L2_ACCESSES";
+				case PmuImplDefined::L3_ACCESSES:
+					return "L3_ACCESSES";
+				case PmuImplDefined::BUS_READS:
+					return "BUS_READS";
+				case PmuImplDefined::BUS_WRITES:
+					return "BUS_WRITES";
+				case PmuImplDefined::MEM_READS:
+					return "MEM_READS";
+				case PmuImplDefined::MEM_WRITES:
+					return "MEM_WRITES";
+				case PmuImplDefined::ASE_SPEC:
+					return "ASE_SPEC";
+				case PmuImplDefined::VFP_SPEC:
+					return "VFP_SPEC";
+				case PmuImplDefined::CRYPTO_SPEC:
+					return "CRYPTO_SPEC";
+				default:
+					return "UNKNOWN RAW COUNTER";
+			}
+
+		default:
+			return std::to_string(perf_config.config);
+	}
+}

+ 133 - 0
ThirdParty/HwcPipe/vendor/arm/pmu/pmu_counter.h

@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <errno.h>
+#include <linux/perf_event.h>
+#include <stdexcept>
+#include <string>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "hwcpipe_log.h"
+
+enum class PmuImplDefined : uint64_t
+{
+	L1_ACCESSES   = 0x4,
+	INSTR_RETIRED = 0x8,
+	L2_ACCESSES   = 0x16,
+	L3_ACCESSES   = 0x2b,
+	BUS_READS     = 0x60,
+	BUS_WRITES    = 0x61,
+	MEM_READS     = 0x66,
+	MEM_WRITES    = 0x67,
+	ASE_SPEC      = 0x74,
+	VFP_SPEC      = 0x75,
+	CRYPTO_SPEC   = 0x77,
+};
+
+struct PmuEventInfo
+{
+	uint64_t type;
+	uint64_t event;
+
+	PmuEventInfo(uint64_t type, uint64_t event) :
+		type(type),
+		event(event)
+	{}
+
+	PmuEventInfo(uint64_t type, PmuImplDefined event) :
+		PmuEventInfo(type, static_cast<uint64_t>(event))
+	{}
+};
+
+/** Class provides access to CPU hardware counters. */
+class PmuCounter
+{
+  public:
+	/** Default constructor. */
+	PmuCounter();
+
+	/** Create PMU counter with specified config.
+     *
+     * This constructor automatically calls @ref open with the default
+     * configuration.
+     *
+     * @param[in] config Counter info.
+     */
+	PmuCounter(PmuEventInfo config);
+
+	/** Default destructor. */
+	~PmuCounter();
+
+	/** Get the counter value.
+     *
+     * @return Counter value casted to the specified type. */
+	template <typename T>
+	T get_value() const;
+
+	/** Open the specified counter based on the given configuration.
+     *
+     * @param[in] config The configuration.
+     */
+	void open(PmuEventInfo config);
+
+	/** Open the specified configuration.
+     *
+     * @param[in] perf_config The specified configuration.
+     */
+	void open(const perf_event_attr &perf_config);
+
+	/** Close the currently open counter. */
+	void close();
+
+	/** Reset counter.
+	 *
+	 * @return false if reset fails. */
+	bool reset();
+
+	/** Print counter config ID. */
+	std::string config_to_str(const perf_event_attr &perf_config);
+
+  private:
+	perf_event_attr _perf_config;
+	long            _fd{-1};
+};
+
+template <typename T>
+T PmuCounter::get_value() const
+{
+	long long     value{};
+	const ssize_t result = read(_fd, &value, sizeof(long long));
+
+	if (result == -1)
+	{
+		throw std::runtime_error("Can't get PMU counter value: " + std::string(std::strerror(errno)));
+	}
+
+	return static_cast<T>(value);
+}

+ 129 - 0
ThirdParty/HwcPipe/vendor/arm/pmu/pmu_profiler.cpp

@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pmu_profiler.h"
+
+#include "hwcpipe_log.h"
+
+namespace hwcpipe
+{
+const std::unordered_map<CpuCounter, PmuEventInfo, CpuCounterHash> pmu_mappings{
+    {CpuCounter::Cycles, {PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES}},
+    {CpuCounter::Instructions, {PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS}},
+    {CpuCounter::CacheReferences, {PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES}},
+    {CpuCounter::CacheMisses, {PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES}},
+    {CpuCounter::BranchInstructions, {PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS}},
+    {CpuCounter::BranchMisses, {PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES}},
+
+    {CpuCounter::L1Accesses, {PERF_TYPE_RAW, PmuImplDefined::L1_ACCESSES}},
+    {CpuCounter::InstrRetired, {PERF_TYPE_RAW, PmuImplDefined::INSTR_RETIRED}},
+    {CpuCounter::L2Accesses, {PERF_TYPE_RAW, PmuImplDefined::L2_ACCESSES}},
+    {CpuCounter::L3Accesses, {PERF_TYPE_RAW, PmuImplDefined::L3_ACCESSES}},
+    {CpuCounter::BusReads, {PERF_TYPE_RAW, PmuImplDefined::BUS_READS}},
+    {CpuCounter::BusWrites, {PERF_TYPE_RAW, PmuImplDefined::BUS_WRITES}},
+    {CpuCounter::MemReads, {PERF_TYPE_RAW, PmuImplDefined::MEM_READS}},
+    {CpuCounter::MemWrites, {PERF_TYPE_RAW, PmuImplDefined::MEM_WRITES}},
+    {CpuCounter::ASESpec, {PERF_TYPE_RAW, PmuImplDefined::ASE_SPEC}},
+    {CpuCounter::VFPSpec, {PERF_TYPE_RAW, PmuImplDefined::VFP_SPEC}},
+    {CpuCounter::CryptoSpec, {PERF_TYPE_RAW, PmuImplDefined::CRYPTO_SPEC}},
+};
+
+PmuProfiler::PmuProfiler(const CpuCounterSet &enabled_counters) :
+    enabled_counters_(enabled_counters)
+{
+	// Set up PMU counters
+	for (const auto &counter : enabled_counters)
+	{
+		const auto &pmu_config = pmu_mappings.find(counter);
+		if (pmu_config != pmu_mappings.end())
+		{
+			try
+			{
+				// Create a PMU counter with the specified configuration
+				auto pmu_counter_res = pmu_counters_.emplace(counter, pmu_config->second);
+
+				// Try reading a value from the counter to check that it opened correctly
+				auto &pmu_counter = pmu_counter_res.first->second;
+				pmu_counter.get_value<long long>();
+
+				// PMU counter is created and can retrieve values
+				available_counters_.insert(counter);
+			}
+			catch (const std::runtime_error &e)
+			{
+				// PMU counter initialization failed
+				HWCPIPE_LOG("_");
+			}
+		}
+	}
+
+	if (available_counters_.size() == 0)
+	{
+		throw std::runtime_error("PMU counters not available.");
+	}
+}
+
+void PmuProfiler::run()
+{
+	for (auto &pmu_counter : pmu_counters_)
+	{
+		pmu_counter.second.reset();
+		prev_measurements_[pmu_counter.first] = Value{};
+	}
+}
+
+const CpuMeasurements &PmuProfiler::sample()
+{
+	for (const auto &counter : enabled_counters_)
+	{
+		const auto &pmu_counter = pmu_counters_.find(counter);
+		if (pmu_counter == pmu_counters_.end())
+		{
+			continue;
+		}
+
+		try
+		{
+			auto value = pmu_counter->second.get_value<long long>();
+
+			// Resetting the PMU counter every frame seems to alter the data,
+			// so we make a differential reading.
+			measurements_[pmu_counter->first]      = value - prev_measurements_[pmu_counter->first].get<long long>();
+			prev_measurements_[pmu_counter->first] = value;
+		}
+		catch (const std::runtime_error &e)
+		{
+			HWCPIPE_LOG("Failed to get value from PMU.");
+		}
+	}
+
+	return measurements_;
+}
+
+void PmuProfiler::stop()
+{
+	// We don't need to do anything on stop()
+}
+
+}        // namespace hwcpipe

+ 90 - 0
ThirdParty/HwcPipe/vendor/arm/pmu/pmu_profiler.h

@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#pragma once
+
+#include "cpu_profiler.h"
+
+#include "pmu_counter.h"
+
+namespace hwcpipe
+{
+/** A CPU profiler that uses PMU counter data. */
+class PmuProfiler : public CpuProfiler
+{
+  public:
+	explicit PmuProfiler(const CpuCounterSet &enabled_counters);
+	virtual ~PmuProfiler() = default;
+
+	virtual const CpuCounterSet &enabled_counters() const override
+	{
+		return enabled_counters_;
+	}
+
+	virtual const CpuCounterSet &supported_counters() const override
+	{
+		return supported_counters_;
+	};
+
+	virtual void set_enabled_counters(CpuCounterSet counters) override
+	{
+		enabled_counters_ = std::move(counters);
+	};
+
+	virtual void                   run() override;
+	virtual const CpuMeasurements &sample() override;
+	virtual void                   stop() override;
+
+  private:
+	CpuCounterSet enabled_counters_{};
+	CpuCounterSet available_counters_{};
+
+	const CpuCounterSet supported_counters_{
+	    CpuCounter::Cycles,
+	    CpuCounter::Instructions,
+	    CpuCounter::CacheReferences,
+	    CpuCounter::CacheMisses,
+	    CpuCounter::BranchInstructions,
+	    CpuCounter::BranchMisses,
+
+	    CpuCounter::L1Accesses,
+	    CpuCounter::InstrRetired,
+	    CpuCounter::L2Accesses,
+	    CpuCounter::L3Accesses,
+	    CpuCounter::BusReads,
+	    CpuCounter::BusWrites,
+	    CpuCounter::MemReads,
+	    CpuCounter::MemWrites,
+	    CpuCounter::ASESpec,
+	    CpuCounter::VFPSpec,
+	    CpuCounter::CryptoSpec,
+	};
+
+	CpuMeasurements measurements_{};
+	CpuMeasurements prev_measurements_{};
+
+	std::unordered_map<CpuCounter, PmuCounter, CpuCounterHash> pmu_counters_{};
+};
+
+}        // namespace hwcpipe