Browse Source

Renderer: Start using the DrawIndex for some rendering

Panagiotis Christopoulos Charitos 1 month ago
parent
commit
6ce99d8ce1
43 changed files with 300 additions and 270 deletions
  1. 3 3
      AnKi/Core/CoreTracer.cpp
  2. 2 6
      AnKi/Editor/EditorUi.cpp
  3. 5 0
      AnKi/Gr/Buffer.h
  4. 1 1
      AnKi/Renderer/IndirectDiffuse.cpp
  5. 2 2
      AnKi/Renderer/IndirectDiffuseClipmaps.cpp
  6. 1 1
      AnKi/Renderer/Reflections.cpp
  7. 32 6
      AnKi/Renderer/Renderer.cpp
  8. 12 4
      AnKi/Renderer/Renderer.h
  9. 1 1
      AnKi/Renderer/RtMaterialFetchDbg.cpp
  10. 1 1
      AnKi/Renderer/RtShadows.cpp
  11. 17 23
      AnKi/Renderer/Utils/Drawer.cpp
  12. 4 2
      AnKi/Renderer/Utils/Drawer.h
  13. 15 12
      AnKi/Renderer/Utils/GpuVisibility.cpp
  14. 41 55
      AnKi/Renderer/Utils/GpuVisibility.h
  15. 2 1
      AnKi/Resource/MeshResource.cpp
  16. 0 2
      AnKi/Resource/ParticleEmitterResource2.h
  17. 0 1
      AnKi/Scene/Components/MaterialComponent.cpp
  18. 0 1
      AnKi/Scene/Components/ParticleEmitterComponent.cpp
  19. 1 1
      AnKi/Scene/Components/SceneComponent.h
  20. 1 3
      AnKi/Scene/RenderStateBucket.cpp
  21. 0 1
      AnKi/Scene/RenderStateBucket.h
  22. 4 5
      AnKi/Shaders/ForwardShadingFog.ankiprog
  23. 3 3
      AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog
  24. 6 8
      AnKi/Shaders/ForwardShadingParticles.ankiprog
  25. 11 11
      AnKi/Shaders/GBufferGeneric.ankiprog
  26. 2 3
      AnKi/Shaders/GBufferGpuParticles.ankiprog
  27. 8 6
      AnKi/Shaders/GpuVisibilityStage1.ankiprog
  28. 31 41
      AnKi/Shaders/GpuVisibilityStage2And3.ankiprog
  29. 0 10
      AnKi/Shaders/Include/GpuSceneFunctions.h
  30. 8 6
      AnKi/Shaders/Include/GpuSceneTypes.h
  31. 8 3
      AnKi/Shaders/Include/GpuVisibilityTypes.h
  32. 11 8
      AnKi/Shaders/Include/MaterialTypes.h
  33. 2 1
      AnKi/Shaders/Include/MeshTypes.h
  34. 6 6
      AnKi/Shaders/Include/UnifiedGeometryTypes.def.h
  35. 16 3
      AnKi/Shaders/MaterialShadersCommon.hlsl
  36. 17 13
      AnKi/Shaders/RtSbtBuild.ankiprog
  37. 1 1
      AnKi/Shaders/RtShadowsSbtBuild.ankiprog
  38. 4 0
      AnKi/Util/CVarSet.h
  39. 12 6
      AnKi/Window/InputAndroid.cpp
  40. 2 2
      AnKi/Window/InputSdl.cpp
  41. 5 5
      AnKi/Window/NativeWindowAndroid.cpp
  42. 1 1
      AnKi/Window/NativeWindowAndroid.h
  43. 1 1
      Sandbox/Main.cpp

+ 3 - 3
AnKi/Core/CoreTracer.cpp

@@ -100,7 +100,7 @@ Error CoreTracer::init(CString directory)
 #	if ANKI_OS_ANDROID
 	if(Tracer::getSingleton().getStreamlineEnabled())
 	{
-		Tracer::getSingleton().setStreamlineEnabled(g_cvarCoreStreamlineEnabled);
+		Tracer::getSingleton().setStreamlineEnabled(g_cvarCoreStreamlineAnnotations);
 	}
 #	endif
 
@@ -335,9 +335,9 @@ void CoreTracer::flushFrame(U64 frame)
 	}
 
 #	if ANKI_OS_ANDROID
-	if(Tracer::getSingleton().getStreamlineEnabled() != g_cvarCoreStreamlineEnabled)
+	if(Tracer::getSingleton().getStreamlineEnabled() != g_cvarCoreStreamlineAnnotations)
 	{
-		Tracer::getSingleton().setStreamlineEnabled(g_cvarCoreStreamlineEnabled);
+		Tracer::getSingleton().setStreamlineEnabled(g_cvarCoreStreamlineAnnotations);
 	}
 #	endif
 }

+ 2 - 6
AnKi/Editor/EditorUi.cpp

@@ -182,7 +182,6 @@ void EditorUi::draw(UiCanvas& canvas)
 
 	{
 		const Vec2 viewportSize = ImGui::GetMainViewport()->WorkSize;
-		const Vec2 viewportPos = ImGui::GetMainViewport()->WorkPos;
 		const Vec2 initialSize = Vec2(viewportSize.y() * 0.75f);
 		const Vec2 initialPos = (viewportSize - initialSize) / 2.0f;
 
@@ -541,6 +540,8 @@ void EditorUi::sceneNodePropertiesWindow()
 		icon = ANKI_CONCATENATE(ICON_MDI_, icon_); \
 		break;
 #include <AnKi/Scene/Components/SceneComponentClasses.def.h>
+					default:
+						ANKI_ASSERT(0);
 					}
 
 					// Header
@@ -712,7 +713,6 @@ void EditorUi::materialComponent(MaterialComponent& comp)
 		dummyButton(0);
 
 		I32 value = comp.getSubmeshIndex();
-		Char txt[100] = "lala";
 		if(ImGui::InputInt(ICON_MDI_VECTOR_POLYGON " Submesh ID", &value, 1, 1, 0))
 		{
 			comp.setSubmeshIndex(value);
@@ -808,8 +808,6 @@ void EditorUi::cVarsWindow()
 	if(ImGui::GetFrameCount() > 1)
 	{
 		// Viewport is one frame delay so do that when frame >1
-		const Vec2 viewportSize = ImGui::GetMainViewport()->WorkSize;
-		const Vec2 viewportPos = ImGui::GetMainViewport()->WorkPos;
 		const Vec2 initialSize = Vec2(900.0f, m_canvas->getSizef().y() * 0.8f);
 		ImGui::SetNextWindowSize(initialSize, ImGuiCond_FirstUseEver);
 		ImGui::SetNextWindowPos(ImGui::GetMainViewport()->GetCenter(), ImGuiCond_Once, Vec2(0.5f));
@@ -931,8 +929,6 @@ void EditorUi::debugRtsWindow()
 	if(ImGui::GetFrameCount() > 1)
 	{
 		// Viewport is one frame delay so do that when frame >1
-		const Vec2 viewportSize = ImGui::GetMainViewport()->WorkSize;
-		const Vec2 viewportPos = ImGui::GetMainViewport()->WorkPos;
 		const Vec2 initialSize = Vec2(450.0f, m_canvas->getSizef().y() * 0.4f);
 		ImGui::SetNextWindowSize(initialSize, ImGuiCond_FirstUseEver);
 		ImGui::SetNextWindowPos(ImGui::GetMainViewport()->GetCenter(), ImGuiCond_Once, Vec2(0.5f));

+ 5 - 0
AnKi/Gr/Buffer.h

@@ -154,6 +154,11 @@ public:
 
 	BufferView& operator=(const BufferView&) = default;
 
+	explicit operator Bool() const
+	{
+		return isValid();
+	}
+
 	[[nodiscard]] Buffer& getBuffer() const
 	{
 		validate();

+ 1 - 1
AnKi/Renderer/IndirectDiffuse.cpp

@@ -148,7 +148,7 @@ void IndirectDiffuse::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESH_LODS, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
 			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_TRANSFORMS, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
 
-			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY_START, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
+			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) \
 	cmdb.bindSrv( \
 		reg, 0, \

+ 2 - 2
AnKi/Renderer/IndirectDiffuseClipmaps.cpp

@@ -482,7 +482,7 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 				cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESH_LODS, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
 				cmdb.bindSrv(ANKI_MATERIAL_REGISTER_TRANSFORMS, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
 
-				cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY_START, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
+				cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) \
 	cmdb.bindSrv( \
 		reg, 0, \
@@ -701,7 +701,7 @@ void IndirectDiffuseClipmaps::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESH_LODS, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
 			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_TRANSFORMS, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
 
-			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY_START, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
+			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) \
 	cmdb.bindSrv( \
 		reg, 0, \

+ 1 - 1
AnKi/Renderer/Reflections.cpp

@@ -302,7 +302,7 @@ void Reflections::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESH_LODS, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
 			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_TRANSFORMS, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
 
-			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY_START, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
+			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) \
 	cmdb.bindSrv( \
 		reg, 0, \

+ 32 - 6
AnKi/Renderer/Renderer.cpp

@@ -128,13 +128,39 @@ Error Renderer::initInternal(const RendererInitInfo& inf)
 	m_rgraph = GrManager::getSingleton().newRenderGraph();
 
 	// Set from the config
-	m_postProcessResolution = UVec2(Vec2(m_swapchainResolution) * g_cvarRenderRenderScaling);
-	alignRoundDown(2, m_postProcessResolution.x());
-	alignRoundDown(2, m_postProcessResolution.y());
+	auto setResolution = [&](UVec2 baseResolution, F32 scale) {
+		UVec2 out;
+		if(scale == 540.0f)
+		{
+			out = UVec2(960, 540);
+		}
+		else if(scale == 720.0f)
+		{
+			out = UVec2(1280, 720);
+		}
+		else if(scale == 1080.0f)
+		{
+			out = UVec2(1920, 1080);
+		}
+		else if(scale == 1440.0f)
+		{
+			out = UVec2(2560, 1440);
+		}
+		else if(scale == 2160.0f)
+		{
+			out = UVec2(3840, 2160);
+		}
+		else
+		{
+			out = UVec2(Vec2(baseResolution) * scale);
+			alignRoundDown(2, out.x());
+			alignRoundDown(2, out.y());
+		}
+		return out;
+	};
 
-	m_internalResolution = UVec2(Vec2(m_postProcessResolution) * g_cvarRenderInternalRenderScaling);
-	alignRoundDown(2, m_internalResolution.x());
-	alignRoundDown(2, m_internalResolution.y());
+	m_postProcessResolution = setResolution(m_swapchainResolution, g_cvarRenderRenderScaling);
+	m_internalResolution = setResolution(m_postProcessResolution, g_cvarRenderInternalRenderScaling);
 
 	ANKI_R_LOGI("Initializing offscreen renderer. Resolution %ux%u. Internal resolution %ux%u", m_postProcessResolution.x(),
 				m_postProcessResolution.y(), m_internalResolution.x(), m_internalResolution.y());

+ 12 - 4
AnKi/Renderer/Renderer.h

@@ -15,10 +15,18 @@ namespace anki {
 
 /// @addtogroup renderer
 /// @{
-ANKI_CVAR(NumericCVar<F32>, Render, InternalRenderScaling, 1.0f, 0.5f, 1.0f,
-		  "A factor over the requested swapchain resolution. Applies to all passes up to TAA")
-ANKI_CVAR(NumericCVar<F32>, Render, RenderScaling, 1.0f, 0.5f, 8.0f,
-		  "A factor over the requested swapchain resolution. Applies to post-processing and UI")
+ANKI_CVAR(
+	NumericCVar<F32>, Render, InternalRenderScaling, 1.0f,
+	[](F32 value) {
+		return (value > 0.1f && value <= 8.0f) || value == 540.0f || value == 720.0f || value == 1080.0f || value == 1440.0f || value == 2160.0f;
+	},
+	"A factor over the requested swapchain resolution or some common resolution values (eg 1080, 720 etc). Applies to all passes up to TAA")
+ANKI_CVAR(
+	NumericCVar<F32>, Render, RenderScaling, 1.0f,
+	[](F32 value) {
+		return (value > 0.1f && value <= 8.0f) || value == 540.0f || value == 720.0f || value == 1080.0f || value == 1440.0f || value == 2160.0f;
+	},
+	"A factor over the requested swapchain resolution. Applies to post-processing and UI")
 ANKI_CVAR(NumericCVar<U32>, Render, ZSplitCount, 64, 8, kMaxZsplitCount, "Clusterer number of Z splits")
 ANKI_CVAR(NumericCVar<U8>, Render, TextureAnisotropy, (ANKI_PLATFORM_MOBILE) ? 1 : 16, 1, 16, "Texture anisotropy for the main passes")
 ANKI_CVAR(BoolCVar, Render, PreferCompute, !ANKI_PLATFORM_MOBILE, "Prefer compute shaders")

+ 1 - 1
AnKi/Renderer/RtMaterialFetchDbg.cpp

@@ -86,7 +86,7 @@ void RtMaterialFetchDbg::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESH_LODS, 0, GpuSceneArrays::MeshLod::getSingleton().getBufferView());
 			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_TRANSFORMS, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
 
-			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY_START, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
+			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) \
 	cmdb.bindSrv( \
 		reg, 0, \

+ 1 - 1
AnKi/Renderer/RtShadows.cpp

@@ -297,7 +297,7 @@ void RtShadows::populateRenderGraph(RenderingContext& ctx)
 			cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, getRenderer().getSamplers().m_trilinearRepeat.get());
 			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_GPU_SCENE, 0, GpuSceneBuffer::getSingleton().getBufferView());
 
-			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY_START, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
+			cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) \
 	cmdb.bindSrv( \
 		reg, 0, \

+ 17 - 23
AnKi/Renderer/Utils/Drawer.cpp

@@ -51,7 +51,7 @@ void RenderableDrawer::setState(const RenderableDrawerArguments& args, CommandBu
 	cmdb.bindSampler(ANKI_MATERIAL_REGISTER_TILINEAR_REPEAT_SAMPLER, 0, args.m_sampler);
 	cmdb.bindSrv(ANKI_MATERIAL_REGISTER_GPU_SCENE, 0, GpuSceneBuffer::getSingleton().getBufferView());
 
-	cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY_START, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
+	cmdb.bindSrv(ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) \
 	cmdb.bindSrv( \
 		reg, 0, \
@@ -62,7 +62,7 @@ void RenderableDrawer::setState(const RenderableDrawerArguments& args, CommandBu
 
 	cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESHLET_BOUNDING_VOLUMES, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
 	cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESHLET_GEOMETRY_DESCRIPTORS, 0, UnifiedGeometryBuffer::getSingleton().getBufferView());
-	if(args.m_mesh.m_meshletInstancesBuffer.isValid())
+	if(args.m_mesh.m_meshletInstancesBuffer)
 	{
 		cmdb.bindSrv(ANKI_MATERIAL_REGISTER_MESHLET_INSTANCES, 0, args.m_mesh.m_meshletInstancesBuffer);
 	}
@@ -71,12 +71,21 @@ void RenderableDrawer::setState(const RenderableDrawerArguments& args, CommandBu
 	cmdb.bindSrv(ANKI_MATERIAL_REGISTER_TRANSFORMS, 0, GpuSceneArrays::Transform::getSingleton().getBufferView());
 	cmdb.bindSrv(ANKI_MATERIAL_REGISTER_PARTICLE_EMITTERS, 0, GpuSceneArrays::ParticleEmitter::getSingleton().getBufferViewSafe());
 	cmdb.bindSampler(ANKI_MATERIAL_REGISTER_NEAREST_CLAMP_SAMPLER, 0, getRenderer().getSamplers().m_nearestNearestClamp.get());
+	if(args.m_legacy.m_perDrawBuffer)
+	{
+		cmdb.bindSrv(ANKI_MATERIAL_REGISTER_PER_DRAW, 0, args.m_legacy.m_perDrawBuffer);
+	}
 
-	if(args.m_mesh.m_firstMeshletBuffer.isValid())
+	if(args.m_mesh.m_firstMeshletBuffer)
 	{
 		cmdb.bindSrv(ANKI_MATERIAL_REGISTER_FIRST_MESHLET, 0, args.m_mesh.m_firstMeshletBuffer);
 	}
 
+	if(args.m_legacy.m_firstPerDrawBuffer)
+	{
+		cmdb.bindSrv(ANKI_MATERIAL_REGISTER_PER_DRAW_OFFSET, 0, args.m_legacy.m_firstPerDrawBuffer);
+	}
+
 	// Misc
 	cmdb.bindIndexBuffer(UnifiedGeometryBuffer::getSingleton().getBufferView(), IndexType::kU16);
 }
@@ -140,14 +149,16 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 				BufferView(args.m_mesh.m_indirectDrawArgs).incrementOffset(sizeof(DrawIndirectArgs) * bucketIdx).setRange(sizeof(DrawIndirectArgs));
 			cmdb.drawIndirect(PrimitiveTopology::kTriangles, indirectArgsBuffView);
 		}
-		else if(state.m_indexedDrawcall)
+		else
 		{
 			// Legacy indexed
 
 			const InstanceRange& instanceRange = args.m_legacy.m_bucketRenderableInstanceRanges[bucketIdx];
-			const U32 maxDrawCount = instanceRange.getInstanceCount();
 
-			cmdb.bindVertexBuffer(0, args.m_legacy.m_renderableInstancesBuffer, sizeof(GpuSceneRenderableInstance), VertexStepRate::kInstance);
+			const UVec4 consts(bucketIdx);
+			cmdb.setFastConstants(&consts, sizeof(consts));
+
+			const U32 maxDrawCount = instanceRange.getInstanceCount();
 
 			const BufferView indirectArgsBuffView = BufferView(args.m_legacy.m_drawIndexedIndirectArgsBuffer)
 														.incrementOffset(instanceRange.getFirstInstance() * sizeof(DrawIndexedIndirectArgs))
@@ -157,23 +168,6 @@ void RenderableDrawer::drawMdi(const RenderableDrawerArguments& args, CommandBuf
 			cmdb.drawIndexedIndirectCount(state.m_primitiveTopology, indirectArgsBuffView, sizeof(DrawIndexedIndirectArgs), mdiCountBuffView,
 										  maxDrawCount);
 		}
-		else
-		{
-			// Legacy non-indexed
-
-			const InstanceRange& instanceRange = args.m_legacy.m_bucketRenderableInstanceRanges[bucketIdx];
-			const U32 maxDrawCount = instanceRange.getInstanceCount();
-
-			cmdb.bindVertexBuffer(0, args.m_legacy.m_renderableInstancesBuffer, sizeof(GpuSceneRenderableInstance), VertexStepRate::kInstance);
-
-			// Yes, the DrawIndexedIndirectArgs is intentional
-			const BufferView indirectArgsBuffView = BufferView(args.m_legacy.m_drawIndexedIndirectArgsBuffer)
-														.incrementOffset(instanceRange.getFirstInstance() * sizeof(DrawIndexedIndirectArgs))
-														.setRange(instanceRange.getInstanceCount() * sizeof(DrawIndexedIndirectArgs));
-			const BufferView countBuffView =
-				BufferView(args.m_legacy.m_mdiDrawCountsBuffer).incrementOffset(sizeof(U32) * bucketIdx).setRange(sizeof(U32));
-			cmdb.drawIndirectCount(state.m_primitiveTopology, indirectArgsBuffView, sizeof(DrawIndexedIndirectArgs), countBuffView, maxDrawCount);
-		}
 	});
 
 #if ANKI_STATS_ENABLED

+ 4 - 2
AnKi/Renderer/Utils/Drawer.h

@@ -34,7 +34,8 @@ public:
 	{
 	public:
 		BufferView m_mdiDrawCountsBuffer;
-		BufferView m_renderableInstancesBuffer;
+		BufferView m_perDrawBuffer;
+		BufferView m_firstPerDrawBuffer;
 		BufferView m_drawIndexedIndirectArgsBuffer;
 
 		ConstWeakArray<InstanceRange> m_bucketRenderableInstanceRanges;
@@ -54,7 +55,8 @@ public:
 	void fill(const GpuVisibilityOutput& visOut)
 	{
 		m_legacy.m_mdiDrawCountsBuffer = visOut.m_legacy.m_mdiDrawCountsBuffer;
-		m_legacy.m_renderableInstancesBuffer = visOut.m_legacy.m_renderableInstancesBuffer;
+		m_legacy.m_perDrawBuffer = visOut.m_legacy.m_perDrawDataBuffer;
+		m_legacy.m_firstPerDrawBuffer = visOut.m_legacy.m_firstPerDrawBuffer;
 		m_legacy.m_drawIndexedIndirectArgsBuffer = visOut.m_legacy.m_drawIndexedIndirectArgsBuffer;
 		m_legacy.m_bucketRenderableInstanceRanges = visOut.m_legacy.m_bucketIndirectArgsRanges;
 		m_mesh.m_dispatchMeshIndirectArgsBuffer = visOut.m_mesh.m_dispatchMeshIndirectArgsBuffer;

+ 15 - 12
AnKi/Renderer/Utils/GpuVisibility.cpp

@@ -299,7 +299,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 			allocateStructuredBuffer<GpuVisibilityVisibleRenderableDesc>(maxLimits.m_maxVisibleLegacyRenderables);
 		m_persistentMemory.m_stage1.m_visibleMeshlets = allocateStructuredBuffer<GpuVisibilityVisibleMeshletDesc>(maxLimits.m_maxVisibleMeshlets);
 
-		m_persistentMemory.m_stage2Legacy.m_instanceRateRenderables = allocateStructuredBuffer<UVec4>(maxLimits.m_maxVisibleLegacyRenderables);
+		m_persistentMemory.m_stage2Legacy.m_perDraw = allocateStructuredBuffer<GpuScenePerDraw>(maxLimits.m_maxVisibleLegacyRenderables);
 		m_persistentMemory.m_stage2Legacy.m_drawIndexedIndirectArgs =
 			allocateStructuredBuffer<DrawIndexedIndirectArgs>(maxLimits.m_maxVisibleLegacyRenderables);
 
@@ -401,7 +401,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 		class
 		{
 		public:
-			BufferView m_instanceRateRenderables;
+			BufferView m_perDraw;
 			BufferView m_drawIndexedIndirectArgs;
 
 			BufferView m_mdiDrawCounts;
@@ -423,9 +423,9 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 	{
 		if(in.m_limitMemory)
 		{
-			PtrSize newRange = sizeof(UVec4) * limits.m_maxVisibleLegacyRenderables;
-			ANKI_ASSERT(newRange <= m_persistentMemory.m_stage2Legacy.m_instanceRateRenderables.getRange());
-			stage2Mem.m_legacy.m_instanceRateRenderables = BufferView(m_persistentMemory.m_stage2Legacy.m_instanceRateRenderables).setRange(newRange);
+			PtrSize newRange = sizeof(GpuScenePerDraw) * limits.m_maxVisibleLegacyRenderables;
+			ANKI_ASSERT(newRange <= m_persistentMemory.m_stage2Legacy.m_perDraw.getRange());
+			stage2Mem.m_legacy.m_perDraw = BufferView(m_persistentMemory.m_stage2Legacy.m_perDraw).setRange(newRange);
 
 			newRange = sizeof(DrawIndexedIndirectArgs) * limits.m_maxVisibleLegacyRenderables;
 			ANKI_ASSERT(newRange <= m_persistentMemory.m_stage2Legacy.m_drawIndexedIndirectArgs.getRange());
@@ -433,7 +433,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 		}
 		else
 		{
-			stage2Mem.m_legacy.m_instanceRateRenderables = allocateStructuredBuffer<UVec4>(limits.m_maxVisibleLegacyRenderables);
+			stage2Mem.m_legacy.m_perDraw = allocateStructuredBuffer<GpuScenePerDraw>(limits.m_maxVisibleLegacyRenderables);
 			stage2Mem.m_legacy.m_drawIndexedIndirectArgs = allocateStructuredBuffer<DrawIndexedIndirectArgs>(limits.m_maxVisibleLegacyRenderables);
 		}
 
@@ -514,7 +514,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 	}
 
 	// Setup output
-	out.m_legacy.m_renderableInstancesBuffer = stage2Mem.m_legacy.m_instanceRateRenderables;
+	out.m_legacy.m_perDrawDataBuffer = stage2Mem.m_legacy.m_perDraw;
 	out.m_legacy.m_mdiDrawCountsBuffer = stage2Mem.m_legacy.m_mdiDrawCounts;
 	out.m_legacy.m_drawIndexedIndirectArgsBuffer = stage2Mem.m_legacy.m_drawIndexedIndirectArgs;
 	out.m_mesh.m_dispatchMeshIndirectArgsBuffer = stage2Mem.m_meshlet.m_dispatchMeshIndirectArgs;
@@ -526,6 +526,10 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 	{
 		out.m_mesh.m_firstMeshletBuffer = stage1Mem.m_meshletPrefixSums;
 	}
+	if(bLegacyRendering)
+	{
+		out.m_legacy.m_firstPerDrawBuffer = stage1Mem.m_renderablePrefixSums;
+	}
 	if(bStoreMeshletsFailedHzb)
 	{
 		out.m_stage1And2Mem.m_meshletsFailedHzb = stage2Mem.m_meshlet.m_meshletsFailedHzb;
@@ -579,7 +583,7 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 			ANKI_ZERO_PART(stage1Mem.m_visibleAabbIndices, true, sizeof(U32))
 			ANKI_ZERO(stage1Mem.m_hash, true)
 
-			ANKI_ZERO(stage2Mem.m_legacy.m_instanceRateRenderables, false)
+			ANKI_ZERO(stage2Mem.m_legacy.m_perDraw, false)
 			ANKI_ZERO(stage2Mem.m_legacy.m_drawIndexedIndirectArgs, true)
 			ANKI_ZERO(stage2Mem.m_legacy.m_mdiDrawCounts, true)
 			ANKI_ZERO(stage2Mem.m_meshlet.m_indirectDrawArgs, true)
@@ -757,13 +761,12 @@ void GpuVisibility::populateRenderGraphInternal(Bool distanceBased, BaseGpuVisib
 					firstDrawIndirectArgAndCount[ibucket].y() = out.m_legacy.m_bucketIndirectArgsRanges[ibucket].m_instanceCount;
 				}
 
-				cmdb.bindUav(0, 0, stage2Mem.m_legacy.m_instanceRateRenderables);
+				cmdb.bindUav(0, 0, stage2Mem.m_legacy.m_perDraw);
 				cmdb.bindUav(1, 0, stage2Mem.m_legacy.m_drawIndexedIndirectArgs);
-				cmdb.bindUav(2, 0, stage2Mem.m_legacy.m_drawIndexedIndirectArgs);
 
-				cmdb.bindUav(3, 0, stage2Mem.m_legacy.m_mdiDrawCounts);
+				cmdb.bindUav(2, 0, stage2Mem.m_legacy.m_mdiDrawCounts);
 
-				cmdb.bindUav(4, 0, m_outOfMemoryReadbackBuffer);
+				cmdb.bindUav(3, 0, m_outOfMemoryReadbackBuffer);
 
 				cmdb.dispatchComputeIndirect(
 					BufferView(stage1Mem.m_gpuVisIndirectDispatchArgs)

+ 41 - 55
AnKi/Renderer/Utils/GpuVisibility.h

@@ -11,10 +11,6 @@
 
 namespace anki {
 
-/// @addtogroup renderer
-/// @{
-
-/// @memberof GpuVisibility
 class InstanceRange
 {
 	friend class GpuVisibility;
@@ -42,7 +38,6 @@ private:
 	U32 m_instanceCount = 0;
 };
 
-/// @memberof GpuVisibility
 class BaseGpuVisibilityInput
 {
 public:
@@ -54,13 +49,12 @@ public:
 
 	RenderGraphBuilder* m_rgraph = nullptr;
 
-	Bool m_gatherAabbIndices = false; ///< For debug draw.
-	Bool m_hashVisibles = false; ///< Create a hash for the visible renderables.
+	Bool m_gatherAabbIndices = false; // For debug draw.
+	Bool m_hashVisibles = false; // Create a hash for the visible renderables.
 
-	Bool m_limitMemory = false; ///< Use less memory but you pay some cost scheduling the work.
+	Bool m_limitMemory = false; // Use less memory but you pay some cost scheduling the work.
 };
 
-/// @memberof GpuVisibility
 class FrustumGpuVisibilityInput : public BaseGpuVisibilityInput
 {
 public:
@@ -69,12 +63,11 @@ public:
 	/// The size of the viewport the visibility results will be used on. Used to kill objects that don't touch the sampling positions.
 	UVec2 m_viewportSize;
 
-	const RenderTargetHandle* m_hzbRt = nullptr; ///< Optional.
+	const RenderTargetHandle* m_hzbRt = nullptr; // Optional.
 
-	Bool m_twoPhaseOcclusionCulling = false; ///< If it's false then it's only a single phase. Only applies when meshlet rendering is enabled.
+	Bool m_twoPhaseOcclusionCulling = false; // If it's false then it's only a single phase. Only applies when meshlet rendering is enabled.
 };
 
-/// @memberof GpuVisibility
 class DistanceGpuVisibilityInput : public BaseGpuVisibilityInput
 {
 public:
@@ -82,39 +75,41 @@ public:
 	F32 m_testRadius = 1.0f;
 };
 
-/// @memberof GpuVisibility
 class GpuVisibilityOutput
 {
 	friend class GpuVisibility;
 
 public:
-	BufferHandle m_dependency; ///< Just expose one handle for depedencies. No need to track all buffers. Wait on it using indirect draw usage.
+	BufferHandle m_dependency; // Just expose one handle for depedencies. No need to track all buffers. Wait on it using indirect draw usage.
 
 	class
 	{
 	public:
-		BufferView m_renderableInstancesBuffer; ///< An array of GpuSceneRenderableInstance.
-		BufferView m_mdiDrawCountsBuffer; ///< An array of U32, one for each render state bucket (even those that use task/mesh flow).
-		BufferView m_drawIndexedIndirectArgsBuffer; ///< Array of DrawIndexedIndirectArgs or DrawIndirectArgs.
+		BufferView m_perDrawDataBuffer; // An array of GpuScenePerDraw.
+		BufferView m_mdiDrawCountsBuffer; // An array of U32, one for each render state bucket (even those that use task/mesh flow).
+		BufferView m_drawIndexedIndirectArgsBuffer; // Array of DrawIndexedIndirectArgs or DrawIndirectArgs.
+
+		// One for each bucket. It's an index to the m_perDrawDataBuffer. It basically gives the offset to the m_perDrawDataBuffer for each bucket
+		BufferView m_firstPerDrawBuffer;
 
-		/// Defines the element sub-ranges in the m_drawIndexedIndirectArgsBuffer per render state bucket.
+		// Defines the element sub-ranges in the m_drawIndexedIndirectArgsBuffer per render state bucket.
 		WeakArray<InstanceRange> m_bucketIndirectArgsRanges;
-	} m_legacy; ///< Legacy vertex shading.
+	} m_legacy; // Legacy vertex shading.
 
 	class
 	{
 	public:
-		BufferView m_dispatchMeshIndirectArgsBuffer; ///< H/W meshlet rendering array of DispatchIndirectArgs, one for each render state bucket.
-		BufferView m_drawIndirectArgs; ///< S/W meshlet rendering array of DrawIndirectArgs, one for each state bucket.
+		BufferView m_dispatchMeshIndirectArgsBuffer; // H/W meshlet rendering array of DispatchIndirectArgs, one for each render state bucket.
+		BufferView m_drawIndirectArgs; // S/W meshlet rendering array of DrawIndirectArgs, one for each state bucket.
 
 		BufferView m_meshletInstancesBuffer;
 
-		BufferView m_firstMeshletBuffer; ///< For H/W meshlet rendering. Points to the first meshlet in the m_meshletInstancesBuffer. One per bucket.
-	} m_mesh; ///< S/W or H/W meshlet rendering.
+		BufferView m_firstMeshletBuffer; // For H/W meshlet rendering. Points to the first meshlet in the m_meshletInstancesBuffer. One per bucket.
+	} m_mesh; // S/W or H/W meshlet rendering.
 
-	BufferView m_visibleAaabbIndicesBuffer; ///< [Optional] Indices to the AABB buffer. The 1st element is the count.
+	BufferView m_visibleAaabbIndicesBuffer; // [Optional] Indices to the AABB buffer. The 1st element is the count.
 
-	BufferView m_visiblesHashBuffer; ///< [Optional] A hash of the visible objects. Used to conditionaly not perform shadow randering.
+	BufferView m_visiblesHashBuffer; // [Optional] A hash of the visible objects. Used to conditionaly not perform shadow randering.
 
 	Bool containsDrawcalls() const
 	{
@@ -129,7 +124,7 @@ private:
 		BufferView m_counters;
 		BufferView m_meshletPrefixSums;
 		BufferView m_gpuVisIndirectDispatchArgs;
-	} m_stage1And2Mem; ///< Output of the 2nd (or 1st) stage that will be used in the 3rd
+	} m_stage1And2Mem; // Output of the 2nd (or 1st) stage that will be used in the 3rd
 
 	class
 	{
@@ -137,7 +132,7 @@ private:
 		BufferView m_indirectDrawArgs;
 		BufferView m_dispatchMeshIndirectArgs;
 		BufferView m_meshletInstances;
-	} m_stage3Mem; ///< Output of the 3rd stage.
+	} m_stage3Mem; // Output of the 3rd stage.
 };
 
 /// Performs GPU visibility for some pass.
@@ -188,7 +183,7 @@ private:
 		class
 		{
 		public:
-			BufferView m_instanceRateRenderables;
+			BufferView m_perDraw;
 			BufferView m_drawIndexedIndirectArgs;
 		} m_stage2Legacy;
 
@@ -216,7 +211,6 @@ private:
 	void populateRenderGraphInternal(Bool distanceBased, BaseGpuVisibilityInput& in, GpuVisibilityOutput& out);
 };
 
-/// @memberof GpuVisibilityNonRenderables
 class GpuVisibilityNonRenderablesInput
 {
 public:
@@ -225,19 +219,18 @@ public:
 	Mat4 m_viewProjectionMat;
 	RenderGraphBuilder* m_rgraph = nullptr;
 
-	const RenderTargetHandle* m_hzbRt = nullptr; ///< Optional.
-	BufferView m_cpuFeedbackBuffer; ///< Optional.
+	const RenderTargetHandle* m_hzbRt = nullptr; // Optional.
+	BufferView m_cpuFeedbackBuffer; // Optional.
 };
 
-/// @memberof GpuVisibilityNonRenderables
 class GpuVisibilityNonRenderablesOutput
 {
 public:
-	BufferHandle m_visiblesBufferHandle; ///< Buffer handle holding the visible objects. Used for tracking. No need to track all buffers.
+	BufferHandle m_visiblesBufferHandle; // Buffer handle holding the visible objects. Used for tracking. No need to track all buffers.
 	BufferView m_visiblesBuffer;
 };
 
-/// GPU visibility of lights, probes etc.
+// GPU visibility of lights, probes etc.
 class GpuVisibilityNonRenderables : public RendererObject
 {
 public:
@@ -252,12 +245,11 @@ private:
 	static constexpr U32 kInitialCounterArraySize = 32;
 
 	BufferHandle m_counterBufferZeroingHandle;
-	BufferPtr m_counterBuffer; ///< A buffer containing multiple counters for atomic operations.
+	BufferPtr m_counterBuffer; // A buffer containing multiple counters for atomic operations.
 	U64 m_lastFrameIdx = kMaxU64;
 	U32 m_counterBufferOffset = 0;
 };
 
-/// @memberof GpuVisibilityAccelerationStructures
 class GpuVisibilityAccelerationStructuresInput
 {
 public:
@@ -284,19 +276,18 @@ public:
 	}
 };
 
-/// @memberof GpuVisibilityAccelerationStructures
 class GpuVisibilityAccelerationStructuresOutput
 {
 public:
-	BufferHandle m_dependency; ///< Some handle to track dependencies. No need to track every buffer.
+	BufferHandle m_dependency; // Some handle to track dependencies. No need to track every buffer.
 
-	BufferView m_instancesBuffer; ///< Points to AccelerationStructureBuildRangeInfo::m_primitiveCount number of AccelerationStructureInstance.
-	BufferView m_renderablesBuffer; ///< AccelerationStructureBuildRangeInfo::m_primitiveCount + 1 number of indices to renderables.
+	BufferView m_instancesBuffer; // Points to AccelerationStructureBuildRangeInfo::m_primitiveCount number of AccelerationStructureInstance.
+	BufferView m_renderablesBuffer; // AccelerationStructureBuildRangeInfo::m_primitiveCount + 1 number of indices to renderables.
 
-	BufferView m_buildSbtIndirectArgsBuffer; ///< The DispatchIndirectArgs for the SBT dispatches.
+	BufferView m_buildSbtIndirectArgsBuffer; // The DispatchIndirectArgs for the SBT dispatches.
 };
 
-/// Performs visibility to gather bottom-level acceleration structures in a buffer that can be used to build a TLAS.
+// Performs visibility to gather bottom-level acceleration structures in a buffer that can be used to build a TLAS.
 class GpuVisibilityAccelerationStructures : public RendererObject
 {
 public:
@@ -309,14 +300,13 @@ private:
 	ShaderProgramPtr m_visibilityGrProg;
 	ShaderProgramPtr m_zeroRemainingInstancesGrProg;
 
-	BufferPtr m_counterBuffer; ///< A buffer containing multiple counters for atomic operations.
+	BufferPtr m_counterBuffer; // A buffer containing multiple counters for atomic operations.
 
 #if ANKI_ASSERTIONS_ENABLED
 	U64 m_lastFrameIdx = kMaxU64;
 #endif
 };
 
-/// @memberof GpuVisibilityLocalLights
 class GpuVisibilityLocalLightsInput
 {
 public:
@@ -326,31 +316,28 @@ public:
 	Vec3 m_cameraPosition;
 	Vec3 m_lookDirection;
 
-	U32 m_lightIndexListSize = 0; ///< The number of light indices to store.
+	U32 m_lightIndexListSize = 0; // The number of light indices to store.
 
 	CString m_passesName = "GpuVisibilityLocalLights";
 
 	RenderGraphBuilder* m_rgraph = nullptr;
 };
 
-/// @memberof GpuVisibilityLocalLights
 class GpuVisibilityLocalLightsOutput
 {
 public:
-	BufferHandle m_dependency; ///< Some handle to track dependencies. No need to track every buffer.
+	BufferHandle m_dependency; // Some handle to track dependencies. No need to track every buffer.
 
-	BufferView m_lightIndexOffsetsPerCellBuffer; ///< One offset to the m_lightIndexBuffer. One offset per cell.
-	BufferView m_lightIndexCountsPerCellBuffer; ///< Number of lights per cell.
-	BufferView m_lightIndexListBuffer; ///< Contains indexes to the GPU scene lights array.
+	BufferView m_lightIndexOffsetsPerCellBuffer; // One offset to the m_lightIndexBuffer. One offset per cell.
+	BufferView m_lightIndexCountsPerCellBuffer; // Number of lights per cell.
+	BufferView m_lightIndexListBuffer; // Contains indexes to the GPU scene lights array.
 
-	/// @{
-	/// The volume of the grid.
+	// The volume of the grid.
 	Vec3 m_lightGridMin;
 	Vec3 m_lightGridMax;
-	/// @}
 };
 
-/// Gathers the local lights around the camera to a grid.
+// Gathers the local lights around the camera to a grid.
 class GpuVisibilityLocalLights : public RendererObject
 {
 public:
@@ -368,6 +355,5 @@ private:
 	ShaderProgramPtr m_prefixSumGrProg;
 	ShaderProgramPtr m_fillGrProg;
 };
-/// @}
 
 } // end namespace anki

+ 2 - 1
AnKi/Resource/MeshResource.cpp

@@ -312,7 +312,8 @@ Error MeshResource::loadAsync(MeshBinaryLoader& loader) const
 
 				outMeshletGeom.m_firstPrimitive =
 					lod.m_meshletIndices.getOffset() / getFormatInfo(kMeshletPrimitiveFormat).m_texelSize + inMeshlet.m_firstPrimitive;
-				outMeshletGeom.m_primitiveCount_R16_Uint_vertexCount_R16_Uint = (inMeshlet.m_primitiveCount << 16u) | inMeshlet.m_vertexCount;
+				outMeshletGeom.m_primitiveCount = inMeshlet.m_primitiveCount;
+				outMeshletGeom.m_vertexCount = inMeshlet.m_vertexCount;
 				outMeshletGeom.m_positionTranslation = m_positionsTranslation;
 				outMeshletGeom.m_positionScale = m_positionsScale;
 

+ 0 - 2
AnKi/Resource/ParticleEmitterResource2.h

@@ -16,8 +16,6 @@ class ShaderProgramResourceVariantInitInfo;
 class ParticleEmitterResourceProperty
 {
 public:
-	ParticleEmitterResourceProperty() = default;
-
 	ParticleEmitterResourceProperty(const ParticleEmitterResourceProperty&) = delete; // Non-copyable
 
 	ParticleEmitterResourceProperty& operator=(const ParticleEmitterResourceProperty&) = delete; // Non-copyable

+ 0 - 1
AnKi/Scene/Components/MaterialComponent.cpp

@@ -281,7 +281,6 @@ void MaterialComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 
 			RenderStateInfo state;
 			state.m_primitiveTopology = PrimitiveTopology::kTriangles;
-			state.m_indexedDrawcall = true;
 			state.m_program = mvariant.getShaderProgram();
 
 			U32 firstIndex, indexCount, firstMeshlet, meshletCount;

+ 0 - 1
AnKi/Scene/Components/ParticleEmitterComponent.cpp

@@ -316,7 +316,6 @@ void ParticleEmitterComponent::loadParticleEmitterResource(CString filename)
 		RenderStateInfo state;
 		state.m_program = prog;
 		state.m_primitiveTopology = PrimitiveTopology::kTriangles;
-		state.m_indexedDrawcall = false;
 		m_renderStateBuckets[t] = RenderStateBucketContainer::getSingleton().addUser(state, t, 0);
 	}
 }

+ 1 - 1
AnKi/Scene/Components/SceneComponent.h

@@ -177,7 +177,7 @@ protected:
 		}
 		else
 		{
-			Bool found = false;
+			[[maybe_unused]] Bool found = false;
 			for(auto it = arr.getBegin(); it != arr.getEnd(); ++it)
 			{
 				if(*it == other)

+ 1 - 3
AnKi/Scene/RenderStateBucket.cpp

@@ -24,10 +24,9 @@ RenderStateBucketContainer::~RenderStateBucketContainer()
 RenderStateBucketIndex RenderStateBucketContainer::addUser(const RenderStateInfo& state, RenderingTechnique technique, U32 lod0MeshletCount)
 {
 	// Compute state gash
-	Array<U64, 3> toHash;
+	Array<U64, 2> toHash;
 	toHash[0] = state.m_program->getUuid();
 	toHash[1] = U64(state.m_primitiveTopology);
-	toHash[2] = state.m_indexedDrawcall;
 	const U64 hash = computeHash(toHash.getBegin(), toHash.getSizeInBytes());
 
 	SceneDynamicArray<ExtendedBucket>& buckets = m_buckets[technique];
@@ -86,7 +85,6 @@ RenderStateBucketIndex RenderStateBucketContainer::addUser(const RenderStateInfo
 	// Bucket not found, create one
 	ExtendedBucket& newBucket = *buckets.emplaceBack();
 	newBucket.m_hash = hash;
-	newBucket.m_indexedDrawcall = state.m_indexedDrawcall;
 	newBucket.m_primitiveTopology = state.m_primitiveTopology;
 	newBucket.m_program = state.m_program;
 	newBucket.m_userCount = 1;

+ 0 - 1
AnKi/Scene/RenderStateBucket.h

@@ -20,7 +20,6 @@ class RenderStateInfo
 public:
 	ShaderProgramPtr m_program;
 	PrimitiveTopology m_primitiveTopology = PrimitiveTopology::kTriangles;
-	Bool m_indexedDrawcall = true;
 };
 
 class RenderStateBucketIndex

+ 4 - 5
AnKi/Shaders/ForwardShadingFog.ankiprog

@@ -18,7 +18,6 @@
 
 struct VertIn
 {
-	UVec4 m_gpuSceneRenderable : MISC0;
 	U32 m_svVertexId : SV_VERTEXID;
 };
 
@@ -34,9 +33,9 @@ VertOut main(VertIn input)
 {
 	VertOut output;
 
-	const GpuSceneRenderableInstance renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
-	const GpuSceneMeshLod mesh = g_meshLods[renderable.m_meshLodIndex];
-	const Mat3x4 worldTransform = g_transforms[renderable.m_worldTransformsIndex];
+	const GpuScenePerDraw perDraw = getGpuScenePerDraw();
+	const GpuSceneMeshLod mesh = g_meshLods[perDraw.m_meshLodIndex];
+	const Mat3x4 worldTransform = g_transforms[perDraw.m_worldTransformsIndex];
 	const UnpackedMeshVertex vertex = loadVertex(mesh, input.m_svVertexId, false);
 
 	const Vec3 worldPos = mul(worldTransform, Vec4(vertex.m_position, 1.0));
@@ -45,7 +44,7 @@ VertOut main(VertIn input)
 	const Vec3 viewPos = mul(g_globalConstants.m_viewTransform, Vec4(worldPos, 1.0));
 	output.m_zVSpace = viewPos.z;
 
-	output.m_constantsOffset = renderable.m_constantsOffset;
+	output.m_constantsOffset = perDraw.m_constantsOffset;
 
 	return output;
 }

+ 3 - 3
AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog

@@ -70,7 +70,7 @@ VertOut main(VertIn input)
 	const UnpackedMeshVertex vertex = loadVertex(meshlet, input.m_svVertexId, false);
 	const U32 worldTransformsIndex = instance.m_worldTransformsIndex_25bit_meshletPrimitiveCount_7bit >> 7u;
 #	else // SW_MESHLETS
-	const GpuSceneRenderableInstance instance = unpackGpuSceneRenderableVertex(input.m_instanceData);
+	const GpuScenePerDraw instance = getGpuScenePerDraw();
 	const GpuSceneMeshLod mesh = g_meshLods[instance.m_meshLodIndex];
 	const U32 worldTransformsIndex = instance.m_worldTransformsIndex;
 	const UnpackedMeshVertex vertex = loadVertex(mesh, input.m_svVertexId, false);
@@ -106,8 +106,8 @@ main(U32 svGroupId : SV_GROUPID, U32 svGroupIndex : SV_GROUPINDEX, out vertices
 	const U32 worldTransformsIndex = instance.m_worldTransformsIndex_25bit_meshletPrimitiveCount_7bit >> 7u;
 
 	const MeshletGeometryDescriptor meshlet = g_meshletGeometryDescriptors[instance.m_meshletGeometryDescriptorIndex];
-	const U32 primCount = meshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint >> 16u;
-	const U32 vertCount = meshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint & 0xFFFFu;
+	const U32 primCount = meshlet.m_primitiveCount;
+	const U32 vertCount = meshlet.m_vertexCount;
 
 	SetMeshOutputCounts(vertCount, primCount);
 

+ 6 - 8
AnKi/Shaders/ForwardShadingParticles.ankiprog

@@ -14,8 +14,8 @@
 
 struct VertIn
 {
-	UVec4 m_instanceData : MISC0;
 	U32 m_svVertexId : SV_VERTEXID;
+	U32 m_svInstanceId : SV_INSTANCEID;
 };
 
 struct VertOut
@@ -37,13 +37,11 @@ struct VertOut
 #if ANKI_VERTEX_SHADER
 VertOut main(VertIn input)
 {
-	const GpuSceneRenderableInstance instance = unpackGpuSceneRenderableVertex(input.m_instanceData);
-	const GpuSceneParticleEmitter particles = g_particleEmitters[instance.m_boneTransformsOffsetOrParticleEmitterIndex];
+	const GpuScenePerDraw instance = getGpuScenePerDraw();
+	const GpuSceneParticleEmitter particles = g_particleEmitters[instance.m_particleEmitterIndex];
 	const GpuSceneMeshLod meshLod = g_meshLods[instance.m_meshLodIndex];
 
-	const U32 particleId = input.m_svVertexId / meshLod.m_indexCount;
-
-	const U32 vertexId = g_unifiedGeom.Load<U16>((meshLod.m_firstIndex + input.m_svVertexId % meshLod.m_indexCount) * sizeof(U16));
+	const U32 particleId = input.m_svInstanceId;
 
 	U32 idx = particles.m_vertexOffsets[(U32)VertexStreamId::kParticlePosition] + particleId * sizeof(Vec3);
 	const Vec3 particlePos = g_gpuScene.Load<Vec3>(idx);
@@ -54,10 +52,10 @@ VertOut main(VertIn input)
 
 	VertOut output;
 
-	output.m_uv = g_unifiedGeom_R32G32_Sfloat[meshLod.m_vertexOffsets[(U32)VertexStreamId::kUv] + vertexId];
+	output.m_uv = g_unifiedGeom_R32G32_Sfloat[meshLod.m_vertexOffsets[(U32)VertexStreamId::kUv] + input.m_svVertexId];
 
 	const Vec3 localPos =
-		g_unifiedGeom_R16G16B16A16_Unorm[meshLod.m_vertexOffsets[(U32)VertexStreamId::kPosition] + vertexId] * meshLod.m_positionScale
+		g_unifiedGeom_R16G16B16A16_Unorm[meshLod.m_vertexOffsets[(U32)VertexStreamId::kPosition] + input.m_svVertexId] * meshLod.m_positionScale
 		+ meshLod.m_positionTranslation;
 
 	// Apply the particle scale, rotate the mesh to face the camera (billboard) and finally apply the particle position

+ 11 - 11
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -256,13 +256,13 @@ VertOut main(VertIn input)
 #		endif
 
 #	else // SW_MESHLETS
-	const GpuSceneRenderableInstance instance = unpackGpuSceneRenderableVertex(input.m_instanceData);
+	const GpuScenePerDraw instance = getGpuScenePerDraw();
 	const GpuSceneMeshLod mesh = g_meshLods[instance.m_meshLodIndex];
 	UnpackedMeshVertex vert = loadVertex(mesh, input.m_svVertexId, ANKI_BONES);
 
 	const U32 constantsOffset = instance.m_constantsOffset;
 	const U32 worldTransformsIndex = instance.m_worldTransformsIndex;
-	const U32 boneTransformsOffset = instance.m_boneTransformsOffsetOrParticleEmitterIndex;
+	const U32 boneTransformsOffset = instance.m_boneTransformsOffset;
 #	endif // SW_MESHLETS
 	ANKI_MAYBE_UNUSED(boneTransformsOffset);
 
@@ -325,8 +325,8 @@ main(U32 svGroupId : SV_GROUPID, U32 svGroupIndex : SV_GROUPINDEX, out vertices
 	ANKI_MAYBE_UNUSED(boneTransformsOffset);
 
 	const MeshletGeometryDescriptor meshlet = g_meshletGeometryDescriptors[instance.m_meshletGeometryDescriptorIndex];
-	const U32 primCount = meshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint >> 16u;
-	const U32 vertCount = meshlet.m_primitiveCount_R16_Uint_vertexCount_R16_Uint & 0xFFFFu;
+	const U32 primCount = meshlet.m_primitiveCount;
+	const U32 vertCount = meshlet.m_vertexCount & 0xFFFFu;
 
 	SetMeshOutputCounts(vertCount, primCount);
 
@@ -598,7 +598,7 @@ GBufferPixelOut main(
 #	if ANKI_ANY_HIT_SHADER
 
 #		if REALLY_ALPHA_TEST
-ANKI_SHADER_RECORD_CONSTANTS(GpuSceneRenderableInstance, g_gpuSceneRenderable)
+ANKI_SHADER_RECORD_CONSTANTS(GpuScenePerDraw, g_record)
 #		endif
 
 [shader("anyhit")] void main(inout RtShadowsRayPayload payload, in Barycentrics barycentrics)
@@ -611,7 +611,7 @@ ANKI_SHADER_RECORD_CONSTANTS(GpuSceneRenderableInstance, g_gpuSceneRenderable)
 
 	const Vec3 bary = Vec3(1.0f - barycentrics.m_value.x - barycentrics.m_value.y, barycentrics.m_value.x, barycentrics.m_value.y);
 
-	const GpuSceneMeshLod mesh = g_meshLods[g_gpuSceneRenderable.m_meshLodIndex];
+	const GpuSceneMeshLod mesh = g_meshLods[g_record.m_meshLodIndex];
 
 	const U16Vec3 indices = g_unifiedGeom.Load<U16Vec3>(mesh.m_firstIndex * sizeof(U16) + PrimitiveIndex() * sizeof(U16Vec3));
 	const UnpackedMeshVertex vert0 = loadVertex(mesh, indices[0], false);
@@ -620,7 +620,7 @@ ANKI_SHADER_RECORD_CONSTANTS(GpuSceneRenderableInstance, g_gpuSceneRenderable)
 
 	const Vec2 uv = vert0.m_uv * bary.x + vert1.m_uv * bary.y + vert2.m_uv * bary.z;
 
-	const AnKiLocalConstants localConstants = loadAnKiLocalConstants(g_gpuScene, g_gpuSceneRenderable.m_constantsOffset);
+	const AnKiLocalConstants localConstants = loadAnKiLocalConstants(g_gpuScene, g_record.m_constantsOffset);
 	const HVec4 diffColorA =
 		getBindlessTexture2DVec4(localConstants.m_diffuseTex).SampleLevel(g_globalSampler, uv, 0.0) * localConstants.m_diffuseScale;
 
@@ -658,11 +658,11 @@ ANKI_SHADER_RECORD_CONSTANTS(GpuSceneRenderableInstance, g_gpuSceneRenderable)
 #	define USE_POSITION_FETCH_NORMALS ANKI_GR_BACKEND_VULKAN
 
 #	if ANKI_CLOSEST_HIT_SHADER
-ANKI_SHADER_RECORD_CONSTANTS(GpuSceneRenderableInstance, g_gpuSceneRenderable)
+ANKI_SHADER_RECORD_CONSTANTS(GpuScenePerDraw, g_record)
 
 [shader("closesthit")] void main(inout RtMaterialFetchRayPayload payload : SV_RayPayload, in Barycentrics barycentrics : SV_IntersectionAttributes)
 {
-	const AnKiLocalConstants localConstants = loadAnKiLocalConstants(g_gpuScene, g_gpuSceneRenderable.m_constantsOffset);
+	const AnKiLocalConstants localConstants = loadAnKiLocalConstants(g_gpuScene, g_record.m_constantsOffset);
 
 	const Bool skipVerts = USE_POSITION_FETCH_NORMALS && payload.m_textureLod > 100.0;
 
@@ -672,7 +672,7 @@ ANKI_SHADER_RECORD_CONSTANTS(GpuSceneRenderableInstance, g_gpuSceneRenderable)
 	{
 		const Vec3 bary = Vec3(1.0f - barycentrics.m_value.x - barycentrics.m_value.y, barycentrics.m_value.x, barycentrics.m_value.y);
 
-		const GpuSceneMeshLod mesh = g_meshLods[g_gpuSceneRenderable.m_meshLodIndex];
+		const GpuSceneMeshLod mesh = g_meshLods[g_record.m_meshLodIndex];
 
 		const U16Vec3 indices = g_unifiedGeom.Load<U16Vec3>(mesh.m_firstIndex * sizeof(U16) + PrimitiveIndex() * sizeof(U16Vec3));
 		const UnpackedMeshVertex vert0 = loadVertex(mesh, indices[0], false);
@@ -732,7 +732,7 @@ ANKI_SHADER_RECORD_CONSTANTS(GpuSceneRenderableInstance, g_gpuSceneRenderable)
 	payload.m_emission = emission;
 
 	// Normal
-	const Mat3x4 worldTransform = g_transforms[g_gpuSceneRenderable.m_worldTransformsIndex];
+	const Mat3x4 worldTransform = g_transforms[g_record.m_worldTransformsIndex];
 	const Vec3 normal = normalize(mul(worldTransform, Vec4(vertNormal, 0.0)));
 	payload.m_worldNormal = normal;
 

+ 2 - 3
AnKi/Shaders/GBufferGpuParticles.ankiprog

@@ -21,7 +21,6 @@
 struct VertIn
 {
 	U32 m_svVertexId : SV_VERTEXID;
-	UVec4 m_gpuSceneRenderable : MISC0;
 };
 
 struct VertOut
@@ -37,8 +36,8 @@ VertOut main(VertIn input)
 {
 	VertOut output;
 
-	const GpuSceneRenderableInstance renderable = unpackGpuSceneRenderableVertex(input.m_gpuSceneRenderable);
-	const GpuSceneParticleEmitter particles = g_particleEmitters[renderable.m_boneTransformsOffsetOrParticleEmitterIndex];
+	const GpuScenePerDraw renderable = getGpuScenePerDraw();
+	const GpuSceneParticleEmitter particles = g_particleEmitters[renderable.m_particleEmitterIndex];
 
 	// Read vertex
 	const U32 particleId = input.m_svVertexId;

+ 8 - 6
AnKi/Shaders/GpuVisibilityStage1.ankiprog

@@ -190,8 +190,10 @@ Bool isVisible(GpuSceneRenderableBoundingVolume bvolume)
 		{
 #if GATHER_MESHLETS
 			GpuVisibilityVisibleMeshletDesc desc;
-			desc.m_renderableIndex_30bit_renderStageBucket_12bit = (renderableIdx << 12u) | renderStateBucket;
-			desc.m_lod_2bit_meshletIndex_30bit = lod << 30u;
+			desc.m_renderableIndex = renderableIdx;
+			desc.m_renderStateBucket = renderStateBucket;
+			desc.m_lod = lod;
+			desc.m_meshletIndex = 0;
 
 			// X dimension will be fixed later
 			U32 firstMeshletIndex;
@@ -209,7 +211,7 @@ Bool isVisible(GpuSceneRenderableBoundingVolume bvolume)
 				for(U32 i = 0; i < meshLod.m_meshletCount; ++i)
 				{
 					SBUFF(g_visibleMeshlets, firstMeshletIndex + i) = desc;
-					++desc.m_lod_2bit_meshletIndex_30bit;
+					++desc.m_meshletIndex;
 				}
 
 				// Add to the next bucket
@@ -237,9 +239,9 @@ Bool isVisible(GpuSceneRenderableBoundingVolume bvolume)
 
 				// Store the renderable
 				GpuVisibilityVisibleRenderableDesc visRenderable;
-				visRenderable.m_lod_2bit_renderableIndex_20bit_renderStageBucket_10bit = lod << 30u;
-				visRenderable.m_lod_2bit_renderableIndex_20bit_renderStageBucket_10bit |= renderableIdx << 10u;
-				visRenderable.m_lod_2bit_renderableIndex_20bit_renderStageBucket_10bit |= renderStateBucket;
+				visRenderable.m_lod = lod;
+				visRenderable.m_renderableIndex = renderableIdx;
+				visRenderable.m_renderStateBucket = renderStateBucket;
 				SBUFF(g_visibleRenderables, firstInstance) = visRenderable;
 
 				// Add to the next bucket

+ 31 - 41
AnKi/Shaders/GpuVisibilityStage2And3.ankiprog

@@ -46,14 +46,13 @@ StructuredBuffer<U32> g_renderablePrefixSums : register(t5);
 StructuredBuffer<UVec2> g_firstDrawIndirectArgAndCount : register(t6);
 
 // These 3 have the same size
-RWStructuredBuffer<UVec4> g_instanceRateRenderables : register(u0);
+RWStructuredBuffer<GpuScenePerDraw> g_perDraw : register(u0);
 RWStructuredBuffer<DrawIndexedIndirectArgs> g_drawIndexedIndirectArgs : register(u1);
-RWStructuredBuffer<DrawIndirectArgsWithPadding> g_drawIndirectArgs : register(u2); // This points to the same buffer as the above
 
 // The MDI counts. One for each render state bucket
-RWStructuredBuffer<U32> g_mdiDrawCounts : register(u3);
+RWStructuredBuffer<U32> g_mdiDrawCounts : register(u2);
 
-RWStructuredBuffer<U32> g_outOfMemoryBuffer : register(u4);
+RWStructuredBuffer<U32> g_outOfMemoryBuffer : register(u3);
 
 [numthreads(NUMTHREADS, 1, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
 {
@@ -64,9 +63,9 @@ RWStructuredBuffer<U32> g_outOfMemoryBuffer : register(u4);
 
 	const GpuVisibilityVisibleRenderableDesc desc = SBUFF(g_visibleRenderables, svDispatchThreadId.x);
 
-	const U32 renderStateBucket = desc.m_lod_2bit_renderableIndex_20bit_renderStageBucket_10bit & ((1u << 10u) - 1u);
-	const U32 renderableIdx = (desc.m_lod_2bit_renderableIndex_20bit_renderStageBucket_10bit >> 10u) & ((1u << 20u) - 1u);
-	const U32 lod = desc.m_lod_2bit_renderableIndex_20bit_renderStageBucket_10bit >> 30u;
+	const U32 renderStateBucket = desc.m_renderStateBucket;
+	const U32 renderableIdx = desc.m_renderableIndex;
+	const U32 lod = desc.m_lod;
 
 	const GpuSceneRenderable renderable = SBUFF(g_renderables, renderableIdx);
 	const U32 meshLodIndex = renderable.m_meshLodsIndex + lod;
@@ -87,43 +86,34 @@ RWStructuredBuffer<U32> g_outOfMemoryBuffer : register(u4);
 		InterlockedExchange(SBUFF(g_mdiDrawCounts, renderStateBucket), SBUFF(g_firstDrawIndirectArgAndCount, renderStateBucket).y, orig);
 		InterlockedOr(SBUFF(g_outOfMemoryBuffer, 0), 2);
 	}
-	else if(!isParticleEmitter)
+	else
 	{
-		// Regular renderables are always indexed
-
 		DrawIndexedIndirectArgs indirect;
 		indirect.m_indexCount = meshLod.m_indexCount;
-		indirect.m_instanceCount = 1;
 		indirect.m_firstIndex = meshLod.m_firstIndex;
 		indirect.m_vertexOffset = 0;
-		indirect.m_firstInstance = instanceIndex;
+		indirect.m_firstInstance = 0;
+
+		if(isParticleEmitter)
+		{
+			const GpuSceneParticleEmitter emitter = SBUFF(g_particleEmitters, renderable.m_particleEmitterIndex);
+			indirect.m_instanceCount = emitter.m_aliveParticleCount;
+		}
+		else
+		{
+			indirect.m_instanceCount = 1;
+		}
+
 		SBUFF(g_drawIndexedIndirectArgs, indirectArgsIndex) = indirect;
 
-		UVec4 instanceVertex;
-		instanceVertex.x = renderable.m_worldTransformsIndex;
-		instanceVertex.y = renderable.m_constantsOffset;
-		instanceVertex.z = meshLodIndex;
-		instanceVertex.w = renderable.m_boneTransformsOffset;
-		SBUFF(g_instanceRateRenderables, instanceIndex) = instanceVertex;
-	}
-	else
-	{
-		const GpuSceneParticleEmitter emitter = SBUFF(g_particleEmitters, renderable.m_particleEmitterIndex);
-
-		DrawIndirectArgsWithPadding indirect;
-		indirect.m_vertexCount = emitter.m_aliveParticleCount * meshLod.m_indexCount;
-		indirect.m_instanceCount = 1;
-		indirect.m_firstVertex = 0;
-		indirect.m_firstInstance = instanceIndex;
-		indirect.m_padding = 0;
-		SBUFF(g_drawIndirectArgs, indirectArgsIndex) = indirect;
-
-		UVec4 instanceVertex;
-		instanceVertex.x = renderable.m_worldTransformsIndex;
-		instanceVertex.y = renderable.m_constantsOffset;
-		instanceVertex.z = meshLodIndex;
-		instanceVertex.w = renderable.m_particleEmitterIndex;
-		SBUFF(g_instanceRateRenderables, instanceIndex) = instanceVertex;
+		GpuScenePerDraw perDraw;
+		perDraw.m_worldTransformsIndex =renderable.m_worldTransformsIndex;
+		perDraw.m_isParticleEmitter = isParticleEmitter;
+		perDraw.m_particleEmitterIndex = renderable.m_particleEmitterIndex;
+		perDraw.m_constantsOffset = renderable.m_constantsOffset;
+		perDraw.m_meshLodIndex = meshLodIndex;
+		perDraw.m_boneTransformsOffset = renderable.m_boneTransformsOffset;
+		SBUFF(g_perDraw, instanceIndex) = perDraw;
 	}
 }
 
@@ -235,10 +225,10 @@ Bool cullMeshlet(GpuSceneRenderable renderable, const MeshletBoundingVolume mesh
 	{
 		const GpuVisibilityVisibleMeshletDesc desc = SBUFF(g_visibleMeshlets, svDispatchThreadId);
 
-		const U32 renderableIdx = desc.m_renderableIndex_30bit_renderStageBucket_12bit >> 12u;
-		const U32 renderStateBucket = desc.m_renderableIndex_30bit_renderStageBucket_12bit & ((1u << 12u) - 1u);
-		const U32 lod = desc.m_lod_2bit_meshletIndex_30bit >> 30u;
-		const U32 meshletIdx = desc.m_lod_2bit_meshletIndex_30bit & ((1u << 30u) - 1u);
+		const U32 renderableIdx = desc.m_renderableIndex;
+		const U32 renderStateBucket = desc.m_renderStateBucket;
+		const U32 lod = desc.m_lod;
+		const U32 meshletIdx = desc.m_meshletIndex;
 
 		const GpuSceneRenderable renderable = SBUFF(g_renderables, renderableIdx);
 		const GpuSceneMeshLod meshLod = SBUFF(g_meshLods, renderable.m_meshLodsIndex + lod);

+ 0 - 10
AnKi/Shaders/Include/GpuSceneFunctions.h

@@ -9,16 +9,6 @@
 
 ANKI_BEGIN_NAMESPACE
 
-inline GpuSceneRenderableInstance unpackGpuSceneRenderableVertex(UVec4 x)
-{
-	GpuSceneRenderableInstance o;
-	o.m_worldTransformsIndex = x[0];
-	o.m_constantsOffset = x[1];
-	o.m_meshLodIndex = x[2];
-	o.m_boneTransformsOffsetOrParticleEmitterIndex = x[3];
-	return o;
-}
-
 inline GpuSceneMeshletInstance unpackGpuSceneMeshletInstance(UVec4 x)
 {
 	GpuSceneMeshletInstance o;

+ 8 - 6
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -33,17 +33,19 @@ struct GpuSceneRenderable
 	U32 m_padding : 8;
 };
 
-// Almost similar to GpuSceneRenderable but with only what the material shaders need. Needs to fit in a UVec4 vertex attribute.
-struct GpuSceneRenderableInstance
+// Almost similar to GpuSceneRenderable but with only what the material shaders need. Make it as small as possible
+struct GpuScenePerDraw
 {
-	U32 m_worldTransformsIndex;
+	U32 m_worldTransformsIndex : 20;
+	U32 m_particleEmitterIndex : 11;
+	U32 m_isParticleEmitter : 1;
 	U32 m_constantsOffset;
 	U32 m_meshLodIndex; // Points to a single GpuSceneMeshLod in the mesh lods.
-	U32 m_boneTransformsOffsetOrParticleEmitterIndex;
+	U32 m_boneTransformsOffset;
 };
-static_assert(sizeof(GpuSceneRenderableInstance) == sizeof(UVec4));
+static_assert(sizeof(GpuScenePerDraw) == sizeof(UVec4));
 
-// Minimal data passed to the vertex shaders in the case of meshlet rendering.
+// Minimal data passed to the vertex shaders in the case of meshlet rendering (both SW and HW).
 struct GpuSceneMeshletInstance
 {
 	U32 m_worldTransformsIndex_25bit_meshletPrimitiveCount_7bit;

+ 8 - 3
AnKi/Shaders/Include/GpuVisibilityTypes.h

@@ -57,13 +57,18 @@ struct GpuVisibilityHash
 
 struct GpuVisibilityVisibleRenderableDesc
 {
-	U32 m_lod_2bit_renderableIndex_20bit_renderStageBucket_10bit;
+	U32 m_lod : 2;
+	U32 m_renderableIndex : 20;
+	U32 m_renderStateBucket : 10;
 };
 
 struct GpuVisibilityVisibleMeshletDesc
 {
-	U32 m_renderableIndex_30bit_renderStageBucket_12bit;
-	U32 m_lod_2bit_meshletIndex_30bit;
+	U32 m_renderableIndex : 30;
+	U32 m_renderStateBucket : 12;
+
+	U32 m_lod : 2;
+	U32 m_meshletIndex : 30;
 };
 
 struct GpuVisibilityMeshletConstants

+ 11 - 8
AnKi/Shaders/Include/MaterialTypes.h

@@ -34,20 +34,23 @@ static_assert(sizeof(MaterialGlobalConstants) == 15 * sizeof(Vec4));
 #define ANKI_MATERIAL_REGISTER_TRANSFORMS 7
 #define ANKI_MATERIAL_REGISTER_NEAREST_CLAMP_SAMPLER 1
 #define ANKI_MATERIAL_REGISTER_FIRST_MESHLET 8
+#define ANKI_MATERIAL_REGISTER_PER_DRAW 9
+#define ANKI_MATERIAL_REGISTER_PER_DRAW_OFFSET 10
 
 // For FW shading:
 #define ANKI_MATERIAL_REGISTER_LINEAR_CLAMP_SAMPLER 2
 #define ANKI_MATERIAL_REGISTER_SHADOW_SAMPLER 3
-#define ANKI_MATERIAL_REGISTER_SCENE_DEPTH 9
-#define ANKI_MATERIAL_REGISTER_LIGHT_VOLUME 10
 #define ANKI_MATERIAL_REGISTER_CLUSTER_SHADING_CONSTANTS 1
-#define ANKI_MATERIAL_REGISTER_CLUSTER_SHADING_POINT_LIGHTS 11
-#define ANKI_MATERIAL_REGISTER_CLUSTER_SHADING_SPOT_LIGHTS 12
-#define ANKI_MATERIAL_REGISTER_SHADOW_ATLAS 13
-#define ANKI_MATERIAL_REGISTER_CLUSTERS 14
-
+#define ANKI_MATERIAL_REGISTER_SCENE_DEPTH 11
+#define ANKI_MATERIAL_REGISTER_LIGHT_VOLUME 12
+#define ANKI_MATERIAL_REGISTER_CLUSTER_SHADING_POINT_LIGHTS 13
+#define ANKI_MATERIAL_REGISTER_CLUSTER_SHADING_SPOT_LIGHTS 14
+#define ANKI_MATERIAL_REGISTER_SHADOW_ATLAS 15
+#define ANKI_MATERIAL_REGISTER_CLUSTERS 16
+
+#define ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY 17
 // Always last because it's variable. Texture buffer bindings pointing to unified geom buffer:
 // !!WARNING!! Remember to update the UnifiedGeometryTypes.def.h if you change that one
-#define ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY_START 15
+#define ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY_TYPED_BUFFER_START 18
 
 ANKI_END_NAMESPACE

+ 2 - 1
AnKi/Shaders/Include/MeshTypes.h

@@ -102,7 +102,8 @@ struct MeshletGeometryDescriptor
 {
 	U32 m_vertexOffsets[(U32)VertexStreamId::kMeshRelatedCount];
 	U32 m_firstPrimitive; // In size of kMeshletPrimitiveFormat
-	U32 m_primitiveCount_R16_Uint_vertexCount_R16_Uint;
+	U32 m_primitiveCount : 16;
+	U32 m_vertexCount : 16;
 	F32 m_positionScale;
 
 	Vec3 m_positionTranslation;

+ 6 - 6
AnKi/Shaders/Include/UnifiedGeometryTypes.def.h

@@ -11,17 +11,17 @@
 
 // !!!! ALL FORMATS NEED TO BE MORE THAN 4 BYTES, else we can't address large typed buffers !!!!
 
-ANKI_UNIFIED_GEOM_FORMAT(R32G32_Sfloat, Vec2, 16)
+ANKI_UNIFIED_GEOM_FORMAT(R32G32_Sfloat, Vec2, 18)
 ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
-ANKI_UNIFIED_GEOM_FORMAT(R32G32B32_Sfloat, Vec3, 17)
+ANKI_UNIFIED_GEOM_FORMAT(R32G32B32_Sfloat, Vec3, 19)
 ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
-ANKI_UNIFIED_GEOM_FORMAT(R32G32B32A32_Sfloat, Vec4, 18)
+ANKI_UNIFIED_GEOM_FORMAT(R32G32B32A32_Sfloat, Vec4, 20)
 ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
-ANKI_UNIFIED_GEOM_FORMAT(R16G16B16A16_Unorm, Vec4, 19)
+ANKI_UNIFIED_GEOM_FORMAT(R16G16B16A16_Unorm, Vec4, 21)
 ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
-ANKI_UNIFIED_GEOM_FORMAT(R8G8B8A8_Snorm, Vec4, 20)
+ANKI_UNIFIED_GEOM_FORMAT(R8G8B8A8_Snorm, Vec4, 22)
 ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR
-ANKI_UNIFIED_GEOM_FORMAT(R8G8B8A8_Uint, UVec4, 21)
+ANKI_UNIFIED_GEOM_FORMAT(R8G8B8A8_Uint, UVec4, 23)
 
 #undef ANKI_UNIFIED_GEOM_FORMAT
 #undef ANKI_UNIFIED_GEOM_FORMAT_SEPERATOR

+ 16 - 3
AnKi/Shaders/MaterialShadersCommon.hlsl

@@ -21,7 +21,7 @@ ConstantBuffer<MaterialGlobalConstants> g_globalConstants : register(ANKI_REG(b,
 ByteAddressBuffer g_gpuScene : register(ANKI_REG(t, ANKI_MATERIAL_REGISTER_GPU_SCENE));
 
 // Unified geom:
-ByteAddressBuffer g_unifiedGeom : register(ANKI_REG(t, ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY_START));
+ByteAddressBuffer g_unifiedGeom : register(ANKI_REG(t, ANKI_MATERIAL_REGISTER_UNIFIED_GEOMETRY));
 #define ANKI_UNIFIED_GEOM_FORMAT(fmt, shaderType, reg) Buffer<shaderType> g_unifiedGeom_##fmt : register(ANKI_REG(t, reg));
 #include <AnKi/Shaders/Include/UnifiedGeometryTypes.def.h>
 
@@ -34,12 +34,18 @@ StructuredBuffer<GpuSceneParticleEmitter> g_particleEmitters : register(ANKI_REG
 StructuredBuffer<Mat3x4> g_transforms : register(ANKI_REG(t, ANKI_MATERIAL_REGISTER_TRANSFORMS));
 SamplerState g_nearestClampSampler : register(ANKI_REG(s, ANKI_MATERIAL_REGISTER_NEAREST_CLAMP_SAMPLER));
 StructuredBuffer<U32> g_firstMeshlet : register(ANKI_REG(t, ANKI_MATERIAL_REGISTER_FIRST_MESHLET));
+StructuredBuffer<GpuScenePerDraw> g_perDraw : register(ANKI_REG(t, ANKI_MATERIAL_REGISTER_PER_DRAW));
 
-#if ANKI_MESH_SHADER
+// One for each bucket. Points to g_perDraw
+StructuredBuffer<U32> g_firstPerDraw : register(ANKI_REG(t, ANKI_MATERIAL_REGISTER_PER_DRAW_OFFSET));
+
+#if ANKI_MESH_SHADER || ANKI_VERTEX_SHADER
 struct Consts
 {
-	UVec3 m_padding;
 	U32 m_bucketIndex;
+	U32 m_padding1;
+	U32 m_padding2;
+	U32 m_padding3;
 };
 ANKI_FAST_CONSTANTS(Consts, g_consts)
 #endif
@@ -62,6 +68,13 @@ Texture2D<Vec4> g_shadowAtlasTex : register(ANKI_REG(t, ANKI_MATERIAL_REGISTER_S
 
 #undef ANKI_REG
 
+#if ANKI_VERTEX_SHADER
+GpuScenePerDraw getGpuScenePerDraw()
+{
+	return g_perDraw[gl_DrawID + g_firstPerDraw[g_consts.m_bucketIndex]];
+}
+#endif
+
 /// Used in vert shading.
 UnpackedMeshVertex loadVertex(GpuSceneMeshLod mlod, U32 svVertexId, Bool bones)
 {

+ 17 - 13
AnKi/Shaders/RtSbtBuild.ankiprog

@@ -23,7 +23,7 @@ StructuredBuffer<LodAndRenderableIndex> g_visibleRenderables : register(t1); //
 
 StructuredBuffer<U32> g_shaderHandles : register(t2);
 
-RWStructuredBuffer<U32> g_sbtBuffer : register(u0);
+RWByteAddressBuffer g_sbtBuffer : register(u0);
 
 ANKI_FAST_CONSTANTS(RtShadowsSbtBuildConstants, g_consts)
 
@@ -39,10 +39,10 @@ ANKI_FAST_CONSTANTS(RtShadowsSbtBuildConstants, g_consts)
 		if(dword < g_consts.m_shaderHandleDwordSize)
 		{
 			const U32 raygenHandleDwordOffset = g_consts.m_raygenHandleIndex * g_consts.m_shaderHandleDwordSize + dword;
-			SBUFF(g_sbtBuffer, dword) = SBUFF(g_shaderHandles, raygenHandleDwordOffset);
+			BAB_STORE(g_sbtBuffer, U32, dword * 4, SBUFF(g_shaderHandles, raygenHandleDwordOffset));
 
 			const U32 missHandleDwordOffset = g_consts.m_missHandleIndex * g_consts.m_shaderHandleDwordSize + dword;
-			SBUFF(g_sbtBuffer, dword + g_consts.m_sbtRecordDwordSize) = SBUFF(g_shaderHandles, missHandleDwordOffset);
+			BAB_STORE(g_sbtBuffer, U32, (dword + g_consts.m_sbtRecordDwordSize) * 4, SBUFF(g_shaderHandles, missHandleDwordOffset));
 		}
 	}
 
@@ -65,26 +65,30 @@ ANKI_FAST_CONSTANTS(RtShadowsSbtBuildConstants, g_consts)
 	{
 		const U32 shaderHandleIdx = (TECHNIQUE == 0) ? renderable.m_rtShadowsShaderHandleIndex : renderable.m_rtMaterialFetchShaderHandleIndex;
 		const U32 offset = shaderHandleIdx * g_consts.m_shaderHandleDwordSize + i;
-		SBUFF(g_sbtBuffer, sbtDwordOffset) = SBUFF(g_shaderHandles, offset);
+		BAB_STORE(g_sbtBuffer, U32, sbtDwordOffset * 4, SBUFF(g_shaderHandles, offset));
 		++sbtDwordOffset;
 	}
 
-	// Copy the GpuSceneRenderableInstance
-	SBUFF(g_sbtBuffer, sbtDwordOffset++) = renderable.m_worldTransformsIndex;
-	SBUFF(g_sbtBuffer, sbtDwordOffset++) = renderable.m_constantsOffset;
-	SBUFF(g_sbtBuffer, sbtDwordOffset++) = renderable.m_meshLodsIndex + lod;
-	SBUFF(g_sbtBuffer, sbtDwordOffset) = 0;
+	// Copy the GpuScenePerDraw
+	GpuScenePerDraw perDraw;
+	perDraw.m_worldTransformsIndex = renderable.m_worldTransformsIndex;
+	perDraw.m_particleEmitterIndex = renderable.m_particleEmitterIndex;
+	perDraw.m_isParticleEmitter = renderable.m_particleEmitterIndex != kMaxU32;
+	perDraw.m_constantsOffset = renderable.m_constantsOffset;
+	perDraw.m_meshLodIndex = renderable.m_meshLodsIndex + lod;
+	perDraw.m_boneTransformsOffset = 0;
+	BAB_STORE(g_sbtBuffer, GpuScenePerDraw, sbtDwordOffset * 4, perDraw);
 }
 #endif
 
 // ===========================================================================
 // PatchRaygenAndMiss                                                        =
 // ===========================================================================
-#if NOT_ZERO(ANKI_TECHNIQUE_PatchRaygenAndMiss)
+#if ANKI_TECHNIQUE_PatchRaygenAndMiss
 
 StructuredBuffer<U32> g_shaderHandles : register(t0);
 
-RWStructuredBuffer<U32> g_sbtBuffer : register(u0);
+RWByteAddressBuffer g_sbtBuffer : register(u0);
 
 ANKI_FAST_CONSTANTS(RtShadowsSbtBuildConstants, g_consts)
 
@@ -95,10 +99,10 @@ ANKI_FAST_CONSTANTS(RtShadowsSbtBuildConstants, g_consts)
 	if(dword < g_consts.m_shaderHandleDwordSize)
 	{
 		const U32 raygenHandleDwordOffset = g_consts.m_raygenHandleIndex * g_consts.m_shaderHandleDwordSize + dword;
-		SBUFF(g_sbtBuffer, dword) = SBUFF(g_shaderHandles, raygenHandleDwordOffset);
+		BAB_STORE(g_sbtBuffer, U32, dword * 4, SBUFF(g_shaderHandles, raygenHandleDwordOffset));
 
 		const U32 missHandleDwordOffset = g_consts.m_missHandleIndex * g_consts.m_shaderHandleDwordSize + dword;
-		SBUFF(g_sbtBuffer, dword + g_consts.m_sbtRecordDwordSize) = SBUFF(g_shaderHandles, missHandleDwordOffset);
+		BAB_STORE(g_sbtBuffer, U32, (dword + g_consts.m_sbtRecordDwordSize) * 4, SBUFF(g_shaderHandles, missHandleDwordOffset));
 	}
 }
 #endif

+ 1 - 1
AnKi/Shaders/RtShadowsSbtBuild.ankiprog

@@ -41,7 +41,7 @@ ANKI_FAST_CONSTANTS(RtShadowsSbtBuildConstants, g_consts)
 		++sbtDwordOffset;
 	}
 
-	// Copy the GpuSceneRenderableInstance
+	// Copy the GpuScenePerDraw
 	g_sbtBuffer[sbtDwordOffset++] = renderable.m_worldTransformsIndex;
 	g_sbtBuffer[sbtDwordOffset++] = renderable.m_constantsOffset;
 	g_sbtBuffer[sbtDwordOffset++] = renderable.m_meshLodsIndex;

+ 4 - 0
AnKi/Util/CVarSet.h

@@ -113,6 +113,10 @@ public:
 		else
 		{
 			ok = m_checkValueCallback(val);
+			if(ok)
+			{
+				m_value = val;
+			}
 		}
 
 		if(!ok)

+ 12 - 6
AnKi/Window/InputAndroid.cpp

@@ -43,12 +43,16 @@ Error Input::init()
 
 Error Input::handleEvents()
 {
-	for(U32& k : m_touchPointers)
+	for(I32& k : m_touchPointers)
 	{
-		if(k)
+		if(k > 0)
 		{
 			++k;
 		}
+		else if(k < 0)
+		{
+			k = 0;
+		}
 	}
 
 	int ident;
@@ -66,10 +70,9 @@ Error Input::handleEvents()
 	return Error::kNone;
 }
 
-void Input::moveCursor(const Vec2& posNdc)
+void Input::moveMouseNdc(const Vec2& posNdc)
 {
 	m_mousePosNdc = posNdc;
-	m_mousePosWin = UVec2((posNdc * 0.5f + 0.5f) * Vec2(F32(NativeWindow::getSingleton().getWidth()), F32(NativeWindow::getSingleton().getHeight())));
 }
 
 void Input::hideCursor([[maybe_unused]] Bool hide)
@@ -82,6 +85,11 @@ Bool Input::hasTouchDevice() const
 	return true;
 }
 
+void Input::setMouseCursor([[maybe_unused]] MouseCursor cursor)
+{
+	// nothing
+}
+
 Error InputAndroid::initInternal()
 {
 	g_androidApp->userData = this;
@@ -139,8 +147,6 @@ int InputAndroid::handleAndroidInput([[maybe_unused]] android_app* app, AInputEv
 				const F32 y = AMotionEvent_getY(event, index);
 				const I32 id = AMotionEvent_getPointerId(event, index);
 
-				m_touchPointerPosWin[id] = UVec2(U32(x), U32(y));
-
 				m_touchPointerPosNdc[id].x() = F32(x) / F32(NativeWindow::getSingleton().getWidth()) * 2.0f - 1.0f;
 				m_touchPointerPosNdc[id].y() = -(F32(y) / F32(NativeWindow::getSingleton().getHeight()) * 2.0f - 1.0f);
 

+ 2 - 2
AnKi/Window/InputSdl.cpp

@@ -166,7 +166,7 @@ Error InputSdl::handleEventsInternal()
 	m_textInput[0] = '\0';
 
 	// add the times a key is being pressed
-	for(auto& k : m_keys)
+	for(I32& k : m_keys)
 	{
 		if(k > 0)
 		{
@@ -177,7 +177,7 @@ Error InputSdl::handleEventsInternal()
 			k = 0;
 		}
 	}
-	for(auto& k : m_mouseBtns)
+	for(I32& k : m_mouseBtns)
 	{
 		if(k > 0)
 		{

+ 5 - 5
AnKi/Window/NativeWindowAndroid.cpp

@@ -32,9 +32,9 @@ void MakeSingletonPtr<NativeWindow>::freeSingleton()
 	}
 }
 
-Error NativeWindow::init(const NativeWindowInitInfo& inf)
+Error NativeWindow::init(U32 targetFps, CString title)
 {
-	return static_cast<NativeWindowAndroid*>(this)->initInternal(inf);
+	return static_cast<NativeWindowAndroid*>(this)->initInternal(targetFps, title);
 }
 
 void NativeWindow::setWindowTitle([[maybe_unused]] CString title)
@@ -66,7 +66,7 @@ NativeWindowAndroid::~NativeWindowAndroid()
 	m_nativeWindowAndroid = nullptr;
 }
 
-Error NativeWindowAndroid::initInternal([[maybe_unused]] const NativeWindowInitInfo& init)
+Error NativeWindowAndroid::initInternal(U32 targetFps, CString title)
 {
 	ANKI_WIND_LOGI("Initializing Android window");
 
@@ -89,9 +89,9 @@ Error NativeWindowAndroid::initInternal([[maybe_unused]] const NativeWindowInitI
 
 	m_nativeWindowAndroid = g_androidApp->window;
 
-	if(init.m_targetFps)
+	if(targetFps)
 	{
-		ANativeWindow_setFrameRate(m_nativeWindowAndroid, F32(init.m_targetFps), ANATIVEWINDOW_FRAME_RATE_COMPATIBILITY_DEFAULT);
+		ANativeWindow_setFrameRate(m_nativeWindowAndroid, F32(targetFps), ANATIVEWINDOW_FRAME_RATE_COMPATIBILITY_DEFAULT);
 	}
 
 	// Set some stuff

+ 1 - 1
AnKi/Window/NativeWindowAndroid.h

@@ -18,7 +18,7 @@ public:
 
 	~NativeWindowAndroid();
 
-	Error initInternal(const NativeWindowInitInfo& init);
+	Error initInternal(U32 targetFps, CString title);
 };
 
 } // end namespace anki

+ 1 - 1
Sandbox/Main.cpp

@@ -42,7 +42,7 @@ Error MyApp::userPreInit()
 
 	// Config
 #if ANKI_OS_ANDROID
-	ANKI_CHECK(CVarSet::getSingleton().setFromCommandLineArguments(argc - 1, argv + 1));
+	ANKI_CHECK(CVarSet::getSingleton().setFromCommandLineArguments(m_argc - 1, m_argv + 1));
 #else
 	ANKI_CHECK(CVarSet::getSingleton().setFromCommandLineArguments(m_argc - 2, m_argv + 2));
 #endif