Browse Source

Improve shader dumping

Panagiotis Christopoulos Charitos 4 months ago
parent
commit
1ffb7783b7

+ 1 - 1
AnKi/Gr/Common.cpp

@@ -212,7 +212,7 @@ StringList ShaderReflection::toString() const
 
 	for(VertexAttributeSemantic attrib : EnumBitsIterable<VertexAttributeSemantic, VertexAttributeSemanticBit>(m_vertex.m_vertexAttributeMask))
 	{
-		list.pushBackSprintf("Vert attrib: %u", U32(attrib));
+		list.pushBackSprintf("Vert attrib: %s", g_vertexAttributeSemanticNames[attrib].cstr());
 	}
 
 	list.pushBackSprintf("Color RT mask: %u", m_pixel.m_colorRenderTargetWritemask.getData()[0]);

+ 7 - 0
AnKi/Gr/Common.h

@@ -568,6 +568,10 @@ enum class ShaderType : U16
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(ShaderType)
 
+inline Array<CString, U32(ShaderType::kCount)> g_shaderTypeNames = {{"Vertex", "Hull", "Domain", "Geometry", "Amplification", "Mesh", "Pixel",
+																	 "Compute", "RayGen", "AnyHit", "ClosestHit", "Miss", "Intersection", "Callable",
+																	 "WorkGraph"}};
+
 enum class ShaderTypeBit : U16
 {
 	kVertex = 1 << 0,
@@ -849,6 +853,9 @@ enum class VertexAttributeSemantic : U8
 };
 ANKI_ENUM_ALLOW_NUMERIC_OPERATIONS(VertexAttributeSemantic)
 
+inline Array<CString, U32(VertexAttributeSemantic::kCount)> g_vertexAttributeSemanticNames = {
+	{"Position", "Normal", "TexCoord", "Color", "Misc0", "Misc1", "Misc2", "Misc3"}};
+
 enum class VertexAttributeSemanticBit : U8
 {
 	kNone,

+ 2 - 1
AnKi/Renderer/Reflections.h

@@ -16,7 +16,8 @@ inline BoolCVar g_rtReflectionsCVar("R", "RtReflections", true, "Enable RT refle
 inline NumericCVar<F32> g_rtReflectionsMaxRayDistanceCVar("R", "RtReflectionsMaxRayDistance", 100.0f, 1.0f, 10000.0f,
 														  "Max RT reflections ray distance");
 inline NumericCVar<U32> g_ssrStepIncrementCVar("R", "SsrStepIncrement", 32, 1, 256, "The number of steps for each loop");
-inline NumericCVar<U32> g_ssrMaxIterationsCVar("R", "SsrMaxIterations", 64, 1, 256, "Max SSR raymarching loop iterations");
+inline NumericCVar<U32> g_ssrMaxIterationsCVar("R", "SsrMaxIterations", ANKI_PLATFORM_MOBILE ? 16 : 64, 1, 256,
+											   "Max SSR raymarching loop iterations");
 
 inline NumericCVar<F32> g_roughnessCutoffToGiEdge0("R", "RoughnessCutoffToGiEdge0", 0.7f, 0.0f, 1.0f,
 												   "Before this roughness the reflections will never sample the GI probes");

+ 8 - 5
AnKi/ShaderCompiler/RadeonGpuAnalyzer.h

@@ -17,11 +17,14 @@ namespace anki {
 class RgaOutput
 {
 public:
-	U32 m_vgprCount;
-	U32 m_sgprCount;
-	U32 m_isaSize;
-
-	void toString(String& str) const;
+	U32 m_vgprCount = 0;
+	U32 m_sgprCount = 0;
+	U32 m_isaSize = 0;
+
+	String toString() const
+	{
+		return String().sprintf("VGPRs %u SGPRs %u ISA size %u", m_vgprCount, m_sgprCount, m_isaSize);
+	}
 };
 
 /// Run the mali offline compiler and get some info back.

+ 425 - 42
AnKi/ShaderCompiler/ShaderDump.cpp

@@ -4,6 +4,9 @@
 // http://www.anki3d.org/LICENSE
 
 #include <AnKi/ShaderCompiler/ShaderDump.h>
+#include <AnKi/ShaderCompiler/MaliOfflineCompiler.h>
+#include <AnKi/ShaderCompiler/Dxc.h>
+#include <AnKi/ShaderCompiler/RadeonGpuAnalyzer.h>
 #include <AnKi/Util/Serializer.h>
 #include <AnKi/Util/StringList.h>
 #include <SpirvCross/spirv_glsl.hpp>
@@ -13,24 +16,329 @@ namespace anki {
 
 #define ANKI_TAB "    "
 
+class MaliOfflineCompilerStats
+{
+public:
+	F64 m_fma = 0.0;
+	F64 m_cvt = 0.0;
+	F64 m_sfu = 0.0;
+	F64 m_loadStore = 0.0;
+	F64 m_varying = 0.0;
+	F64 m_texture = 0.0;
+
+	F64 m_workRegisters = 0.0;
+	F64 m_spilling = 0.0;
+	F64 m_fp16ArithmeticPercentage = 0.0;
+
+	MaliOfflineCompilerStats() = default;
+
+	MaliOfflineCompilerStats(const MaliOfflineCompilerOut& in)
+	{
+		*this = in;
+	}
+
+	MaliOfflineCompilerStats& operator=(const MaliOfflineCompilerOut& in)
+	{
+		m_fma = F64(in.m_fma);
+		m_cvt = F64(in.m_cvt);
+		m_sfu = F64(in.m_sfu);
+		m_loadStore = F64(in.m_loadStore);
+		m_varying = F64(in.m_varying);
+		m_texture = F64(in.m_texture);
+		m_workRegisters = F64(in.m_workRegisters);
+		m_spilling = F64(in.m_spilling);
+		m_fp16ArithmeticPercentage = F64(in.m_fp16ArithmeticPercentage);
+		return *this;
+	}
+
+	MaliOfflineCompilerStats operator+(const MaliOfflineCompilerStats& b) const
+	{
+		MaliOfflineCompilerStats out;
+		out.m_fma = m_fma + b.m_fma;
+		out.m_cvt = m_cvt + b.m_cvt;
+		out.m_sfu = m_sfu + b.m_sfu;
+		out.m_loadStore = m_loadStore + b.m_loadStore;
+		out.m_varying = m_varying + b.m_varying;
+		out.m_texture = m_texture + b.m_texture;
+		out.m_workRegisters = m_workRegisters + b.m_workRegisters;
+		out.m_spilling = m_spilling + b.m_spilling;
+		out.m_fp16ArithmeticPercentage = m_fp16ArithmeticPercentage + b.m_fp16ArithmeticPercentage;
+		return out;
+	}
+
+	MaliOfflineCompilerStats operator*(F64 val) const
+	{
+		MaliOfflineCompilerStats out;
+		out.m_fma = m_fma * val;
+		out.m_cvt = m_cvt * val;
+		out.m_sfu = m_sfu * val;
+		out.m_loadStore = m_loadStore * val;
+		out.m_varying = m_varying * val;
+		out.m_texture = m_texture * val;
+		out.m_workRegisters = m_workRegisters * val;
+		out.m_spilling = m_spilling * val;
+		out.m_fp16ArithmeticPercentage = m_fp16ArithmeticPercentage * val;
+		return out;
+	}
+
+	MaliOfflineCompilerStats max(const MaliOfflineCompilerStats& b) const
+	{
+		MaliOfflineCompilerStats out;
+		out.m_fma = anki::max(m_fma, b.m_fma);
+		out.m_cvt = anki::max(m_cvt, b.m_cvt);
+		out.m_sfu = anki::max(m_sfu, b.m_sfu);
+		out.m_loadStore = anki::max(m_loadStore, b.m_loadStore);
+		out.m_varying = anki::max(m_varying, b.m_varying);
+		out.m_texture = anki::max(m_texture, b.m_texture);
+		out.m_workRegisters = anki::max(m_workRegisters, b.m_workRegisters);
+		out.m_spilling = anki::max(m_spilling, b.m_spilling);
+		out.m_fp16ArithmeticPercentage = anki::max(m_fp16ArithmeticPercentage, b.m_fp16ArithmeticPercentage);
+		return out;
+	}
+
+	String toString() const
+	{
+		String str;
+		str.sprintf("Regs %f Spilling %f FMA %f CVT %f SFU %f LS %f VAR %f TEX %f FP16 %f%%", m_workRegisters, m_spilling, m_fma, m_cvt, m_sfu,
+					m_loadStore, m_varying, m_texture, m_fp16ArithmeticPercentage);
+		return str;
+	}
+};
+
+class RgaStats
+{
+public:
+	F64 m_vgprCount = 0.0;
+	F64 m_sgprCount = 0.0;
+	F64 m_isaSize = 0.0;
+
+	RgaStats() = default;
+
+	RgaStats(const RgaOutput& b)
+	{
+		*this = b;
+	}
+
+	RgaStats& operator=(const RgaOutput& b)
+	{
+		m_vgprCount = F64(b.m_vgprCount);
+		m_sgprCount = F64(b.m_sgprCount);
+		m_isaSize = F64(b.m_isaSize);
+		return *this;
+	}
+
+	RgaStats operator+(const RgaStats& b) const
+	{
+		RgaStats out;
+		out.m_vgprCount = m_vgprCount + b.m_vgprCount;
+		out.m_sgprCount = m_sgprCount + b.m_sgprCount;
+		out.m_isaSize = m_isaSize + b.m_isaSize;
+		return out;
+	}
+
+	RgaStats operator*(F64 val) const
+	{
+		RgaStats out;
+		out.m_vgprCount = m_vgprCount * val;
+		out.m_sgprCount = m_sgprCount * val;
+		out.m_isaSize = m_isaSize * val;
+		return out;
+	}
+
+	RgaStats max(const RgaStats& b) const
+	{
+		RgaStats out;
+		out.m_vgprCount = anki::max(m_vgprCount, b.m_vgprCount);
+		out.m_sgprCount = anki::max(m_sgprCount, b.m_sgprCount);
+		out.m_isaSize = anki::max(m_isaSize, b.m_isaSize);
+		return out;
+	}
+
+	String toString() const
+	{
+		return String().sprintf("VGPRs %f SGPRs %f ISA size %f", m_vgprCount, m_sgprCount, m_isaSize);
+	}
+};
+
+class PerStageDumpStats
+{
+public:
+	class
+	{
+	public:
+		MaliOfflineCompilerStats m_mali;
+		RgaStats m_amd;
+	} m_average;
+
+	class
+	{
+	public:
+		MaliOfflineCompilerStats m_mali;
+		RgaStats m_amd;
+	} m_max;
+};
+
+class DumpStats
+{
+public:
+	Array<PerStageDumpStats, U32(ShaderType::kCount)> m_stages;
+};
+
 void dumpShaderBinary(const ShaderDumpOptions& options, const ShaderBinary& binary, ShaderCompilerString& humanReadable)
 {
 	ShaderCompilerStringList lines;
 
-	lines.pushBackSprintf("\n**BINARIES (%u)**\n", binary.m_codeBlocks.getSize());
+	lines.pushBackSprintf("# BINARIES (%u)\n\n", binary.m_codeBlocks.getSize());
+	Array<MaliOfflineCompilerStats, U32(ShaderType::kCount)> maliAverages;
+	Array<MaliOfflineCompilerStats, U32(ShaderType::kCount)> maliMaxes;
+	Array<RgaStats, U32(ShaderType::kCount)> rgaAverages;
+	Array<RgaStats, U32(ShaderType::kCount)> rgaMaxes;
+	Array<U32, U32(ShaderType::kCount)> shadersPerStage = {};
 	U32 count = 0;
+	ShaderTypeBit stagesInUse = ShaderTypeBit::kNone;
 	for(const ShaderBinaryCodeBlock& code : binary.m_codeBlocks)
 	{
-		lines.pushBackSprintf(ANKI_TAB "bin%05u \n", count++);
+		// Rewrite spir-v because of the decorations we ask DXC to put
+		Bool bRequiresMeshShaders = false;
+		DynamicArray<U8> newSpirv;
+		newSpirv.resize(code.m_binary.getSize());
+		memcpy(newSpirv.getBegin(), code.m_binary.getBegin(), code.m_binary.getSizeInBytes());
+		ShaderType shaderType = ShaderType::kCount;
+		Error visitErr = Error::kNone;
+		visitSpirv(WeakArray<U32>(reinterpret_cast<U32*>(newSpirv.getBegin()), U32(newSpirv.getSizeInBytes() / sizeof(U32))),
+				   [&](U32 cmd, WeakArray<U32> instructions) {
+					   if(cmd == spv::OpDecorate && instructions[1] == spv::DecorationDescriptorSet && instructions[2] == kDxcVkBindlessRegisterSpace)
+					   {
+						   // Bindless set, rewrite its set
+						   instructions[2] = kMaxRegisterSpaces;
+					   }
+					   else if(cmd == spv::OpCapability && instructions[0] == spv::CapabilityMeshShadingEXT)
+					   {
+						   bRequiresMeshShaders = true;
+					   }
+					   else if(cmd == spv::OpEntryPoint)
+					   {
+						   switch(instructions[0])
+						   {
+						   case spv::ExecutionModelVertex:
+							   shaderType = ShaderType::kVertex;
+							   break;
+						   case spv::ExecutionModelTessellationControl:
+							   shaderType = ShaderType::kHull;
+							   break;
+						   case spv::ExecutionModelTessellationEvaluation:
+							   shaderType = ShaderType::kDomain;
+							   break;
+						   case spv::ExecutionModelGeometry:
+							   shaderType = ShaderType::kGeometry;
+							   break;
+						   case spv::ExecutionModelTaskEXT:
+						   case spv::ExecutionModelTaskNV:
+							   shaderType = ShaderType::kAmplification;
+							   break;
+						   case spv::ExecutionModelMeshEXT:
+						   case spv::ExecutionModelMeshNV:
+							   shaderType = ShaderType::kMesh;
+							   break;
+						   case spv::ExecutionModelFragment:
+							   shaderType = ShaderType::kPixel;
+							   break;
+						   case spv::ExecutionModelGLCompute:
+							   shaderType = ShaderType::kCompute;
+							   break;
+						   case spv::ExecutionModelRayGenerationKHR:
+							   shaderType = ShaderType::kRayGen;
+							   break;
+						   case spv::ExecutionModelAnyHitKHR:
+							   shaderType = ShaderType::kAnyHit;
+							   break;
+						   case spv::ExecutionModelClosestHitKHR:
+							   shaderType = ShaderType::kClosestHit;
+							   break;
+						   case spv::ExecutionModelMissKHR:
+							   shaderType = ShaderType::kMiss;
+							   break;
+						   case spv::ExecutionModelIntersectionKHR:
+							   shaderType = ShaderType::kIntersection;
+							   break;
+						   case spv::ExecutionModelCallableKHR:
+							   shaderType = ShaderType::kCallable;
+							   break;
+						   default:
+							   ANKI_SHADER_COMPILER_LOGE("Unrecognized SPIRV execution model: %u", instructions[0]);
+							   visitErr = Error::kFunctionFailed;
+						   }
+					   }
+				   });
+
+		stagesInUse |= ShaderTypeBit(1 << shaderType);
+		++shadersPerStage[shaderType];
+
+		lines.pushBackSprintf("## bin%05u (%s)\n", count, g_shaderTypeNames[shaderType].cstr());
+
+		if(options.m_maliStats || options.m_amdStats)
+		{
+			lines.pushBack("### Stats\n");
+		}
 
-		String reflectionStr;
-		code.m_reflection.toString().join("\n" ANKI_TAB ANKI_TAB, reflectionStr);
+		if(options.m_maliStats && !visitErr)
+		{
+			lines.pushBack("```\n");
+		}
+
+		if(options.m_maliStats && !visitErr)
+		{
+			if((shaderType == ShaderType::kVertex || shaderType == ShaderType::kPixel || shaderType == ShaderType::kCompute) && !bRequiresMeshShaders)
+			{
+				MaliOfflineCompilerOut maliocOut;
+				const Error err = runMaliOfflineCompiler(newSpirv, shaderType, maliocOut);
+				if(err)
+				{
+					ANKI_LOGE("Mali offline compiler failed");
+					lines.pushBackSprintf("Mali: *malioc failed*  \n");
+				}
+				else
+				{
+					lines.pushBackSprintf("Mali: %s  \n", maliocOut.toString().cstr());
+					maliAverages[shaderType] = maliAverages[shaderType] + maliocOut;
+					maliMaxes[shaderType] = maliMaxes[shaderType].max(maliocOut);
+				}
+			}
+		}
+
+		if(options.m_amdStats && !visitErr)
+		{
+			if((shaderType == ShaderType::kVertex || shaderType == ShaderType::kPixel || shaderType == ShaderType::kCompute) && !bRequiresMeshShaders)
+			{
+				RgaOutput rgaOut = {};
+				const Error err = runRadeonGpuAnalyzer(newSpirv, shaderType, rgaOut);
+				if(err)
+				{
+					ANKI_LOGE("RGA failed");
+					lines.pushBackSprintf("AMD: *RGA failed*  \n");
+				}
+				else
+				{
+					lines.pushBackSprintf("AMD: %s  \n", rgaOut.toString().cstr());
+					rgaAverages[shaderType] = rgaAverages[shaderType] + rgaOut;
+					rgaMaxes[shaderType] = rgaMaxes[shaderType].max(rgaOut);
+				}
+			}
+		}
+
+		if(options.m_maliStats && !visitErr)
+		{
+			lines.pushBack("```\n");
+		}
 
-		lines.pushBackSprintf(ANKI_TAB ANKI_TAB "%s\n", reflectionStr.cstr());
+		String reflectionStr;
+		code.m_reflection.toString().join("\n", reflectionStr);
+		lines.pushBack("### Reflection\n");
+		lines.pushBackSprintf("```\n%s\n```\n", reflectionStr.cstr());
 
 		if(options.m_writeGlsl)
 		{
-			lines.pushBack(ANKI_TAB ANKI_TAB "----\n");
+			lines.pushBack("### GLSL\n");
 
 			spirv_cross::CompilerGLSL::Options options;
 			options.vulkan_semantics = true;
@@ -47,14 +355,14 @@ void dumpShaderBinary(const ShaderDumpOptions& options, const ShaderBinary& bina
 			StringList sourceLines;
 			sourceLines.splitString(glsl.c_str(), '\n');
 			String newGlsl;
-			sourceLines.join("\n" ANKI_TAB ANKI_TAB, newGlsl);
+			sourceLines.join("\n", newGlsl);
 
-			lines.pushBackSprintf(ANKI_TAB ANKI_TAB "%s\n", newGlsl.cstr());
+			lines.pushBackSprintf("```GLSL\n%s\n```\n", newGlsl.cstr());
 		}
 
 		if(options.m_writeSpirv)
 		{
-			lines.pushBack(ANKI_TAB ANKI_TAB "----\n");
+			lines.pushBack("### SPIR-V\n");
 
 			spv_context context = spvContextCreate(SPV_ENV_UNIVERSAL_1_5);
 
@@ -72,40 +380,50 @@ void dumpShaderBinary(const ShaderDumpOptions& options, const ShaderBinary& bina
 				spvlines.splitString(text->str, '\n');
 
 				String final;
-				spvlines.join("\n" ANKI_TAB ANKI_TAB, final);
+				spvlines.join("\n", final);
 
-				lines.pushBackSprintf(ANKI_TAB ANKI_TAB "%s\n", final.cstr());
+				lines.pushBackSprintf("```\n%s\n```\n", final.cstr());
 			}
 			else
 			{
-				lines.pushBackSprintf(ANKI_TAB ANKI_TAB "*error in spiv-dis*\n");
+				lines.pushBackSprintf("*error in spiv-dis*\n");
 			}
 
 			spvTextDestroy(text);
 		}
+
+		++count;
 	}
 
 	// Mutators
-	lines.pushBackSprintf("\n**MUTATORS (%u)**\n", binary.m_mutators.getSize());
+	lines.pushBackSprintf("\n# MUTATORS (%u)\n\n", binary.m_mutators.getSize());
 	if(binary.m_mutators.getSize() > 0)
 	{
+		lines.pushBackSprintf("| %-32s | %-18s |\n", "Name", "Values");
+		lines.pushBackSprintf("| -------------------------------- | ------------------ |\n");
+
 		for(const ShaderBinaryMutator& mutator : binary.m_mutators)
 		{
-			lines.pushBackSprintf(ANKI_TAB "%-32s values (", &mutator.m_name[0]);
+			ShaderCompilerStringList valuesStrl;
 			for(U32 i = 0; i < mutator.m_values.getSize(); ++i)
 			{
-				lines.pushBackSprintf((i < mutator.m_values.getSize() - 1) ? "%d," : "%d)", mutator.m_values[i]);
+				valuesStrl.pushBackSprintf("%d", mutator.m_values[i]);
 			}
-			lines.pushBack("\n");
+			ShaderCompilerString valuesStr;
+			valuesStrl.join(", ", valuesStr);
+
+			lines.pushBackSprintf("| %-32s | %-18s |\n", &mutator.m_name[0], valuesStr.cstr());
 		}
 	}
 
 	// Techniques
-	lines.pushBackSprintf("\n**TECHNIQUES (%u)**\n", binary.m_techniques.getSize());
+	lines.pushBackSprintf("\n# TECHNIQUES (%u)\n\n", binary.m_techniques.getSize());
+	lines.pushBackSprintf("| %-32s | %-12s |\n", "Name", "Shader Types");
+	lines.pushBackSprintf("| -------------------------------- | ------------ |\n");
 	count = 0;
 	for(const ShaderBinaryTechnique& t : binary.m_techniques)
 	{
-		lines.pushBackSprintf(ANKI_TAB "%-32s shaderTypes 0x%02x\n", t.m_name.getBegin(), U32(t.m_shaderTypes));
+		lines.pushBackSprintf("| %-32s | 0x%010x |\n", t.m_name.getBegin(), U32(t.m_shaderTypes));
 	}
 
 	// Mutations
@@ -115,79 +433,144 @@ void dumpShaderBinary(const ShaderDumpOptions& options, const ShaderBinary& bina
 		skippedMutations += mutation.m_variantIndex == kMaxU32;
 	}
 
-	lines.pushBackSprintf("\n**MUTATIONS (%u skipped %u)**\n", binary.m_mutations.getSize(), skippedMutations);
+	lines.pushBackSprintf("\n# MUTATIONS (total %u, skipped %u)\n\n", binary.m_mutations.getSize(), skippedMutations);
+	lines.pushBackSprintf("| %-8s | %-8s | %-18s |", "Mutation", "Variant", "Hash");
+
+	if(binary.m_mutators.getSize() > 0)
+	{
+		for(U32 i = 0; i < binary.m_mutators.getSize(); ++i)
+		{
+			lines.pushBackSprintf(" %-32s |", binary.m_mutators[i].m_name.getBegin());
+		}
+	}
+	lines.pushBack("\n");
+
+	lines.pushBackSprintf("| -------- | -------- | ------------------ |");
+	if(binary.m_mutators.getSize() > 0)
+	{
+		for(U32 i = 0; i < binary.m_mutators.getSize(); ++i)
+		{
+			lines.pushBackSprintf(" -------------------------------- |");
+		}
+	}
+	lines.pushBack("\n");
+
 	count = 0;
 	for(const ShaderBinaryMutation& mutation : binary.m_mutations)
 	{
 		if(mutation.m_variantIndex != kMaxU32)
 		{
-			lines.pushBackSprintf(ANKI_TAB "mut%05u variantIndex var%05u hash 0x%016" PRIX64 " values (", count++, mutation.m_variantIndex,
-								  mutation.m_hash);
+			lines.pushBackSprintf("| mut%05u | var%05u | 0x%016" PRIX64 " | ", count++, mutation.m_variantIndex, mutation.m_hash);
 		}
 		else
 		{
-			lines.pushBackSprintf(ANKI_TAB "mut%05u variantIndex N/A      hash 0x%016" PRIX64 " values (", count++, mutation.m_hash);
+			lines.pushBackSprintf("| mut%05u | -        | 0x%016" PRIX64 " | ", count++, mutation.m_hash);
 		}
 
 		if(mutation.m_values.getSize() > 0)
 		{
 			for(U32 i = 0; i < mutation.m_values.getSize(); ++i)
 			{
-				lines.pushBackSprintf((i < mutation.m_values.getSize() - 1) ? "%s %4d, " : "%s %4d", binary.m_mutators[i].m_name.getBegin(),
-									  I32(mutation.m_values[i]));
+				lines.pushBackSprintf("%-32d | ", I32(mutation.m_values[i]));
 			}
-
-			lines.pushBack(")");
-		}
-		else
-		{
-			lines.pushBack("N/A)");
 		}
 
 		lines.pushBack("\n");
 	}
 
 	// Variants
-	lines.pushBackSprintf("\n**SHADER VARIANTS (%u)**\n", binary.m_variants.getSize());
+	lines.pushBackSprintf("\n# SHADER VARIANTS (%u)\n\n", binary.m_variants.getSize());
+	lines.pushBackSprintf("| Variant  | %-32s | ", "Technique");
+	for(ShaderType s : EnumBitsIterable<ShaderType, ShaderTypeBit>(stagesInUse))
+	{
+		lines.pushBackSprintf("%-13s | ", g_shaderTypeNames[s].cstr());
+	}
+	lines.pushBack("\n");
+	lines.pushBackSprintf("| -------- | %s |", ShaderCompilerString('-', 32).cstr());
+	for([[maybe_unused]] ShaderType s : EnumBitsIterable<ShaderType, ShaderTypeBit>(stagesInUse))
+	{
+		lines.pushBackSprintf(" %s |", ShaderCompilerString('-', 13).cstr());
+	}
+	lines.pushBack("\n");
+
 	count = 0;
 	for(const ShaderBinaryVariant& variant : binary.m_variants)
 	{
-		lines.pushBackSprintf(ANKI_TAB "var%05u\n", count++);
-
 		// Binary indices
 		for(U32 t = 0; t < binary.m_techniques.getSize(); ++t)
 		{
-			lines.pushBackSprintf(ANKI_TAB ANKI_TAB "%-32s binaries (", binary.m_techniques[t].m_name.getBegin());
+			if(t == 0)
+			{
+				lines.pushBackSprintf("| var%05u | ", count);
+			}
+			else
+			{
+				lines.pushBack("|          | ");
+			}
+			lines.pushBackSprintf("%-32s | ", binary.m_techniques[t].m_name.getBegin());
 
-			for(ShaderType s : EnumIterable<ShaderType>())
+			for(ShaderType s : EnumBitsIterable<ShaderType, ShaderTypeBit>(stagesInUse))
 			{
 				if(variant.m_techniqueCodeBlocks[t].m_codeBlockIndices[s] < kMaxU32)
 				{
-					lines.pushBackSprintf("bin%05u", variant.m_techniqueCodeBlocks[t].m_codeBlockIndices[s]);
+					lines.pushBackSprintf("bin%05u      | ", variant.m_techniqueCodeBlocks[t].m_codeBlockIndices[s]);
 				}
 				else
 				{
-					lines.pushBack("--------");
+					lines.pushBackSprintf("-%s | ", ShaderCompilerString(' ', 12).cstr());
 				}
-
-				lines.pushBack((s == ShaderType::kCount - 1) ? ")\n" : ", ");
 			}
+
+			lines.pushBack("\n");
 		}
+
+		++count;
 	}
 
 	// Structs
-	lines.pushBackSprintf("\n**STRUCTS (%u)**\n", binary.m_structs.getSize());
+	lines.pushBackSprintf("\n# STRUCTS (%u)\n\n", binary.m_structs.getSize());
 	if(binary.m_structs.getSize() > 0)
 	{
 		for(const ShaderBinaryStruct& s : binary.m_structs)
 		{
-			lines.pushBackSprintf(ANKI_TAB "%-32s size %4u\n", s.m_name.getBegin(), s.m_size);
+			lines.pushBack("```C++\n");
+			lines.pushBackSprintf("struct %s // size: %u\n", s.m_name.getBegin(), s.m_size);
+			lines.pushBack("{\n");
 
 			for(const ShaderBinaryStructMember& member : s.m_members)
 			{
 				const CString typeStr = getShaderVariableDataTypeInfo(member.m_type).m_name;
-				lines.pushBackSprintf(ANKI_TAB ANKI_TAB "%-32s type %5s offset %4d\n", member.m_name.getBegin(), typeStr.cstr(), member.m_offset);
+				lines.pushBackSprintf(ANKI_TAB "%5s %s; // offset: %u\n", typeStr.cstr(), member.m_name.getBegin(), member.m_offset);
+			}
+			lines.pushBack("};\n");
+			lines.pushBack("```\n");
+		}
+	}
+
+	// Stats
+	if(options.m_maliStats || options.m_amdStats)
+	{
+		lines.pushBackSprintf("\n# COMPILER STATS\n\n");
+
+		for(ShaderType s : EnumBitsIterable<ShaderType, ShaderTypeBit>(stagesInUse))
+		{
+			lines.pushBackSprintf("%s\n```\n", g_shaderTypeNames[s].cstr());
+
+			if(options.m_maliStats)
+			{
+				maliAverages[s] = maliAverages[s] * (1.0 / F64(shadersPerStage[s]));
+				lines.pushBackSprintf("Mali avg: %s  \n", maliAverages[s].toString().cstr());
+				lines.pushBackSprintf("Mali max: %s  \n", maliMaxes[s].toString().cstr());
 			}
+
+			if(options.m_amdStats)
+			{
+				rgaAverages[s] = rgaAverages[s] * (1.0 / F64(shadersPerStage[s]));
+				lines.pushBackSprintf("AMD avg: %s  \n", rgaAverages[s].toString().cstr());
+				lines.pushBackSprintf("AMD max: %s  \n", rgaMaxes[s].toString().cstr());
+			}
+
+			lines.pushBack("```\n");
 		}
 	}
 

+ 2 - 0
AnKi/ShaderCompiler/ShaderDump.h

@@ -16,6 +16,8 @@ class ShaderDumpOptions
 public:
 	Bool m_writeGlsl = true;
 	Bool m_writeSpirv = false;
+	Bool m_maliStats = false;
+	Bool m_amdStats = false;
 };
 
 /// Create a human readable representation of the shader binary.

+ 1 - 1
AnKi/Shaders/Reflections.ankiprog

@@ -311,7 +311,7 @@ Bool doSsr(UVec2 logicalViewportSize, UVec2 realCoord, UVec2 logicalCoord, Vec2
 	F32 hitDepth;
 	{
 		RayMarchingConfig config = (RayMarchingConfig)0;
-		config.m_maxIterations = g_consts.m_ssrStepIncrement;
+		config.m_maxIterations = g_consts.m_ssrMaxIterations;
 		config.m_depthTextureLod = 8.0; // Use the max LOD for ray marching
 		config.m_stepIncrement = g_consts.m_ssrStepIncrement;
 

+ 1 - 273
Tools/Shader/ShaderProgramBinaryDumpMain.cpp

@@ -5,9 +5,6 @@
 
 #include <AnKi/ShaderCompiler/ShaderCompiler.h>
 #include <AnKi/ShaderCompiler/ShaderDump.h>
-#include <AnKi/ShaderCompiler/MaliOfflineCompiler.h>
-#include <AnKi/ShaderCompiler/RadeonGpuAnalyzer.h>
-#include <AnKi/ShaderCompiler/Dxc.h>
 #include <AnKi/Util/ThreadHive.h>
 #include <AnKi/Util/System.h>
 #include <ThirdParty/SpirvCross/spirv.hpp>
@@ -138,271 +135,6 @@ static Error parseCommandLineArgs(WeakArray<char*> argv, Bool& dumpStats, Bool&
 	return Error::kNone;
 }
 
-Error dumpStats(const ShaderBinary& bin)
-{
-	printf("\nOffline compilers stats:\n");
-	fflush(stdout);
-
-	class Stats
-	{
-	public:
-		class
-		{
-		public:
-			F64 m_fma;
-			F64 m_cvt;
-			F64 m_sfu;
-			F64 m_loadStore;
-			F64 m_varying;
-			F64 m_texture;
-			F64 m_workRegisters;
-			F64 m_fp16ArithmeticPercentage;
-			F64 m_spillingCount;
-		} m_arm;
-
-		class
-		{
-		public:
-			F64 m_vgprCount;
-			F64 m_sgprCount;
-			F64 m_isaSize;
-		} m_amd;
-
-		Stats(F64 v)
-		{
-			m_arm.m_fma = m_arm.m_cvt = m_arm.m_sfu = m_arm.m_loadStore = m_arm.m_varying = m_arm.m_texture = m_arm.m_workRegisters =
-				m_arm.m_fp16ArithmeticPercentage = m_arm.m_spillingCount = v;
-
-			m_amd.m_vgprCount = m_amd.m_sgprCount = m_amd.m_isaSize = v;
-		}
-
-		Stats()
-			: Stats(0.0)
-		{
-		}
-
-		void op(const Stats& b, void (*func)(F64& a, F64 b))
-		{
-			func(m_arm.m_fma, b.m_arm.m_fma);
-			func(m_arm.m_cvt, b.m_arm.m_cvt);
-			func(m_arm.m_sfu, b.m_arm.m_sfu);
-			func(m_arm.m_loadStore, b.m_arm.m_loadStore);
-			func(m_arm.m_varying, b.m_arm.m_varying);
-			func(m_arm.m_texture, b.m_arm.m_texture);
-			func(m_arm.m_workRegisters, b.m_arm.m_workRegisters);
-			func(m_arm.m_fp16ArithmeticPercentage, b.m_arm.m_fp16ArithmeticPercentage);
-			func(m_arm.m_spillingCount, b.m_arm.m_spillingCount);
-
-			func(m_amd.m_vgprCount, b.m_amd.m_vgprCount);
-			func(m_amd.m_sgprCount, b.m_amd.m_sgprCount);
-			func(m_amd.m_isaSize, b.m_amd.m_isaSize);
-		}
-	};
-
-	class StageStats
-	{
-	public:
-		Stats m_avgStats{0.0};
-		Stats m_maxStats{-1.0};
-		Stats m_minStats{kMaxF64};
-		U32 m_spillingCount = 0;
-		U32 m_count = 0;
-	};
-
-	class Ctx
-	{
-	public:
-		DynamicArray<Stats> m_spirvStats;
-		DynamicArray<Atomic<U32>> m_spirvVisited;
-		Atomic<U32> m_variantCount = {0};
-		const ShaderBinary* m_bin = nullptr;
-		Atomic<I32> m_error = {0};
-	};
-
-	Ctx ctx;
-	ctx.m_bin = &bin;
-	ctx.m_spirvStats.resize(bin.m_codeBlocks.getSize());
-	ctx.m_spirvVisited.resize(bin.m_codeBlocks.getSize(), 0);
-	memset(ctx.m_spirvVisited.getBegin(), 0, ctx.m_spirvVisited.getSizeInBytes());
-
-	ThreadHive hive(getCpuCoresCount());
-
-	ThreadHiveTaskCallback callback = [](void* userData, [[maybe_unused]] U32 threadId, [[maybe_unused]] ThreadHive& hive,
-										 [[maybe_unused]] ThreadHiveSemaphore* signalSemaphore) {
-		Ctx& ctx = *static_cast<Ctx*>(userData);
-		U32 variantIdx;
-
-		while((variantIdx = ctx.m_variantCount.fetchAdd(1)) < ctx.m_bin->m_variants.getSize() && ctx.m_error.load() == 0)
-		{
-			const ShaderBinaryVariant& variant = ctx.m_bin->m_variants[variantIdx];
-
-			for(U32 t = 0; t < variant.m_techniqueCodeBlocks.getSize(); ++t)
-			{
-				for(ShaderType shaderType : EnumBitsIterable<ShaderType, ShaderTypeBit>(ctx.m_bin->m_techniques[t].m_shaderTypes))
-				{
-					const U32 codeblockIdx = variant.m_techniqueCodeBlocks[t].m_codeBlockIndices[shaderType];
-
-					const Bool visited = ctx.m_spirvVisited[codeblockIdx].fetchAdd(1) != 0;
-					if(visited)
-					{
-						continue;
-					}
-
-					const ShaderBinaryCodeBlock& codeBlock = ctx.m_bin->m_codeBlocks[codeblockIdx];
-
-					// Rewrite spir-v because of the decorations we ask DXC to put
-					Bool bRequiresMeshShaders = false;
-					DynamicArray<U8> newSpirv;
-					newSpirv.resize(codeBlock.m_binary.getSize());
-					memcpy(newSpirv.getBegin(), codeBlock.m_binary.getBegin(), codeBlock.m_binary.getSizeInBytes());
-					visitSpirv(WeakArray<U32>(reinterpret_cast<U32*>(newSpirv.getBegin()), U32(newSpirv.getSizeInBytes() / sizeof(U32))),
-							   [&](U32 cmd, WeakArray<U32> instructions) {
-								   if(cmd == spv::OpDecorate && instructions[1] == spv::DecorationDescriptorSet
-									  && instructions[2] == kDxcVkBindlessRegisterSpace)
-								   {
-									   // Bindless set, rewrite its set
-									   instructions[2] = kMaxRegisterSpaces;
-								   }
-								   else if(cmd == spv::OpCapability && instructions[0] == spv::CapabilityMeshShadingEXT)
-								   {
-									   bRequiresMeshShaders = true;
-								   }
-							   });
-
-					// Arm stats
-					MaliOfflineCompilerOut maliocOut;
-					Error err = Error::kNone;
-
-					if((shaderType == ShaderType::kVertex || shaderType == ShaderType::kPixel || shaderType == ShaderType::kCompute)
-					   && !bRequiresMeshShaders)
-					{
-						err = runMaliOfflineCompiler(newSpirv, shaderType, maliocOut);
-
-						if(err)
-						{
-							ANKI_LOGE("Mali offline compiler failed");
-							ctx.m_error.store(1);
-							break;
-						}
-					}
-
-					// AMD
-					RgaOutput rgaOut = {};
-#if 1
-					if((shaderType == ShaderType::kVertex || shaderType == ShaderType::kPixel || shaderType == ShaderType::kCompute)
-					   && !bRequiresMeshShaders)
-					{
-						err = runRadeonGpuAnalyzer(newSpirv, shaderType, rgaOut);
-						if(err)
-						{
-							ANKI_LOGE("Radeon GPU Analyzer compiler failed");
-							ctx.m_error.store(1);
-							break;
-						}
-					}
-#endif
-
-					// Write stats
-					Stats& stats = ctx.m_spirvStats[codeblockIdx];
-
-					stats.m_arm.m_fma = maliocOut.m_fma;
-					stats.m_arm.m_cvt = maliocOut.m_cvt;
-					stats.m_arm.m_sfu = maliocOut.m_sfu;
-					stats.m_arm.m_loadStore = maliocOut.m_loadStore;
-					stats.m_arm.m_varying = maliocOut.m_varying;
-					stats.m_arm.m_texture = maliocOut.m_texture;
-					stats.m_arm.m_workRegisters = maliocOut.m_workRegisters;
-					stats.m_arm.m_fp16ArithmeticPercentage = maliocOut.m_fp16ArithmeticPercentage;
-					stats.m_arm.m_spillingCount = (maliocOut.m_spilling) ? 1.0 : 0.0;
-
-					stats.m_amd.m_vgprCount = F64(rgaOut.m_vgprCount);
-					stats.m_amd.m_sgprCount = F64(rgaOut.m_sgprCount);
-					stats.m_amd.m_isaSize = F64(rgaOut.m_isaSize);
-				}
-
-				if(variantIdx > 0 && ((variantIdx + 1) % 32) == 0)
-				{
-					printf("Processed %u out of %u variants\n", variantIdx + 1, ctx.m_bin->m_variants.getSize());
-				}
-			}
-		} // while
-	};
-
-	for(U32 i = 0; i < hive.getThreadCount(); ++i)
-	{
-		hive.submitTask(callback, &ctx);
-	}
-
-	hive.waitAllTasks();
-
-	if(ctx.m_error.load() != 0)
-	{
-		return Error::kFunctionFailed;
-	}
-
-	// Cather the results
-	Array<StageStats, U32(ShaderType::kCount)> allStageStats;
-	for(const ShaderBinaryVariant& variant : bin.m_variants)
-	{
-		for(U32 t = 0; t < variant.m_techniqueCodeBlocks.getSize(); ++t)
-		{
-			for(ShaderType shaderType : EnumBitsIterable<ShaderType, ShaderTypeBit>(ctx.m_bin->m_techniques[t].m_shaderTypes))
-			{
-				const U32 codeblockIdx = variant.m_techniqueCodeBlocks[t].m_codeBlockIndices[shaderType];
-
-				const Stats& stats = ctx.m_spirvStats[codeblockIdx];
-				StageStats& allStats = allStageStats[shaderType];
-
-				++allStats.m_count;
-
-				allStats.m_avgStats.op(stats, [](F64& a, F64 b) {
-					a += b;
-				});
-
-				allStats.m_minStats.op(stats, [](F64& a, F64 b) {
-					a = min(a, b);
-				});
-
-				allStats.m_maxStats.op(stats, [](F64& a, F64 b) {
-					a = max(a, b);
-				});
-			}
-		}
-	}
-
-	// Print
-	for(ShaderType shaderType : EnumIterable<ShaderType>())
-	{
-		const StageStats& stage = allStageStats[shaderType];
-		if(stage.m_count == 0)
-		{
-			continue;
-		}
-
-		printf("Stage %u\n", U32(shaderType));
-		printf("  Arm shaders spilling regs %u\n", stage.m_spillingCount);
-
-		const F64 countf = F64(stage.m_count);
-
-		const Stats& avg = stage.m_avgStats;
-		printf("  Average:\n");
-		printf("    Arm: Regs %f FMA %f CVT %f SFU %f LS %f VAR %f TEX %f FP16 %f%%\n", avg.m_arm.m_workRegisters / countf, avg.m_arm.m_fma / countf,
-			   avg.m_arm.m_cvt / countf, avg.m_arm.m_sfu / countf, avg.m_arm.m_loadStore / countf, avg.m_arm.m_varying / countf,
-			   avg.m_arm.m_texture / countf, avg.m_arm.m_fp16ArithmeticPercentage / countf);
-		printf("    AMD: VGPR %f SGPR %f ISA size %f\n", avg.m_amd.m_vgprCount / countf, avg.m_amd.m_sgprCount / countf,
-			   avg.m_amd.m_isaSize / countf);
-
-		const Stats& maxs = stage.m_maxStats;
-		printf("  Max:\n");
-		printf("    Arm: Regs %f FMA %f CVT %f SFU %f LS %f VAR %f TEX %f FP16 %f%%\n", maxs.m_arm.m_workRegisters, maxs.m_arm.m_fma,
-			   maxs.m_arm.m_cvt, maxs.m_arm.m_sfu, maxs.m_arm.m_loadStore, maxs.m_arm.m_varying, maxs.m_arm.m_texture,
-			   maxs.m_arm.m_fp16ArithmeticPercentage);
-		printf("    AMD: VGPR %f SGPR %f ISA size %f\n", maxs.m_amd.m_vgprCount, maxs.m_amd.m_sgprCount, maxs.m_amd.m_isaSize);
-	}
-
-	return Error::kNone;
-}
-
 Error dump(CString fname, Bool bDumpStats, Bool dumpBinary, Bool glsl, Bool spirv)
 {
 	ShaderBinary* binary;
@@ -424,6 +156,7 @@ Error dump(CString fname, Bool bDumpStats, Bool dumpBinary, Bool glsl, Bool spir
 		ShaderDumpOptions options;
 		options.m_writeGlsl = glsl;
 		options.m_writeSpirv = spirv;
+		options.m_maliStats = options.m_amdStats = bDumpStats;
 
 		ShaderCompilerString txt;
 		dumpShaderBinary(options, *binary, txt);
@@ -431,11 +164,6 @@ Error dump(CString fname, Bool bDumpStats, Bool dumpBinary, Bool glsl, Bool spir
 		printf("%s\n", txt.cstr());
 	}
 
-	if(bDumpStats)
-	{
-		ANKI_CHECK(dumpStats(*binary));
-	}
-
 	return Error::kNone;
 }