Browse Source

Optimizations

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
61cae637d1

+ 15 - 15
AnKi/Scene/Components/ModelComponent.cpp

@@ -24,7 +24,7 @@ ModelComponent::~ModelComponent()
 	m_modelPatchMergeKeys.destroy(m_node->getMemoryPool());
 	m_modelPatchMergeKeys.destroy(m_node->getMemoryPool());
 
 
 	GpuSceneMemoryPool& gpuScene = *getExternalSubsystems(*m_node).m_gpuSceneMemoryPool;
 	GpuSceneMemoryPool& gpuScene = *getExternalSubsystems(*m_node).m_gpuSceneMemoryPool;
-	gpuScene.free(m_gpuSceneMeshGpuViews);
+	gpuScene.free(m_gpuSceneMeshLods);
 	gpuScene.free(m_gpuSceneUniforms);
 	gpuScene.free(m_gpuSceneUniforms);
 
 
 	m_gpuSceneUniformsOffsetPerPatch.destroy(m_node->getMemoryPool());
 	m_gpuSceneUniformsOffsetPerPatch.destroy(m_node->getMemoryPool());
@@ -53,8 +53,8 @@ Error ModelComponent::loadModelResource(CString filename)
 	// GPU scene allocations
 	// GPU scene allocations
 	GpuSceneMemoryPool& gpuScene = *getExternalSubsystems(*m_node).m_gpuSceneMemoryPool;
 	GpuSceneMemoryPool& gpuScene = *getExternalSubsystems(*m_node).m_gpuSceneMemoryPool;
 
 
-	gpuScene.free(m_gpuSceneMeshGpuViews);
-	gpuScene.allocate(sizeof(GpuSceneMesh) * m_modelPatchMergeKeys.getSize(), 4, m_gpuSceneMeshGpuViews);
+	gpuScene.free(m_gpuSceneMeshLods);
+	gpuScene.allocate(sizeof(GpuSceneMeshLod) * kMaxLodCount * m_modelPatchMergeKeys.getSize(), 4, m_gpuSceneMeshLods);
 
 
 	U32 uniformsSize = 0;
 	U32 uniformsSize = 0;
 	m_gpuSceneUniformsOffsetPerPatch.resize(m_node->getMemoryPool(), modelPatchCount);
 	m_gpuSceneUniformsOffsetPerPatch.resize(m_node->getMemoryPool(), modelPatchCount);
@@ -86,19 +86,19 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 
 
 		// Upload the mesh views
 		// Upload the mesh views
 		const U32 modelPatchCount = m_model->getModelPatches().getSize();
 		const U32 modelPatchCount = m_model->getModelPatches().getSize();
-		DynamicArrayRaii<GpuSceneMesh> meshViews(info.m_framePool, modelPatchCount);
+		DynamicArrayRaii<GpuSceneMeshLod> meshLods(info.m_framePool, modelPatchCount * kMaxLodCount);
 		for(U32 i = 0; i < modelPatchCount; ++i)
 		for(U32 i = 0; i < modelPatchCount; ++i)
 		{
 		{
-			GpuSceneMesh& view = meshViews[i];
 			const ModelPatch& patch = m_model->getModelPatches()[i];
 			const ModelPatch& patch = m_model->getModelPatches()[i];
 			const MeshResource& mesh = *patch.getMesh();
 			const MeshResource& mesh = *patch.getMesh();
 
 
-			zeroMemory(view);
-			view.m_positionScale = mesh.getPositionsScale();
-			view.m_positionTranslation = mesh.getPositionsTranslation();
-
 			for(U32 l = 0; l < mesh.getLodCount(); ++l)
 			for(U32 l = 0; l < mesh.getLodCount(); ++l)
 			{
 			{
+				GpuSceneMeshLod& meshLod = meshLods[i * kMaxLodCount + l];
+				meshLod = {};
+				meshLod.m_positionScale = mesh.getPositionsScale();
+				meshLod.m_positionTranslation = mesh.getPositionsTranslation();
+
 				for(VertexStreamId stream = VertexStreamId::kPosition; stream <= VertexStreamId::kBoneWeights; ++stream)
 				for(VertexStreamId stream = VertexStreamId::kPosition; stream <= VertexStreamId::kBoneWeights; ++stream)
 				{
 				{
 					if(!mesh.isVertexStreamPresent(stream))
 					if(!mesh.isVertexStreamPresent(stream))
@@ -113,26 +113,26 @@ Error ModelComponent::update(SceneComponentUpdateInfo& info, Bool& updated)
 					const PtrSize elementSize = getFormatInfo(kMeshRelatedVertexStreamFormats[stream]).m_texelSize;
 					const PtrSize elementSize = getFormatInfo(kMeshRelatedVertexStreamFormats[stream]).m_texelSize;
 
 
 					ANKI_ASSERT((offset % elementSize) == 0);
 					ANKI_ASSERT((offset % elementSize) == 0);
-					view.m_lods[l].m_vertexOffsets[U32(stream)] = U32(offset / elementSize);
+					meshLod.m_vertexOffsets[U32(stream)] = U32(offset / elementSize);
 				}
 				}
 
 
 				PtrSize offset;
 				PtrSize offset;
 				U32 indexCount;
 				U32 indexCount;
 				IndexType indexType;
 				IndexType indexType;
 				mesh.getIndexBufferInfo(l, offset, indexCount, indexType);
 				mesh.getIndexBufferInfo(l, offset, indexCount, indexType);
-				view.m_lods[l].m_indexOffset = U32(offset);
-				view.m_lods[l].m_indexCount = indexCount;
+				meshLod.m_indexOffset = U32(offset);
+				meshLod.m_indexCount = indexCount;
 			}
 			}
 
 
 			// Copy the last LOD to the rest just in case
 			// Copy the last LOD to the rest just in case
 			for(U32 l = mesh.getLodCount(); l < kMaxLodCount; ++l)
 			for(U32 l = mesh.getLodCount(); l < kMaxLodCount; ++l)
 			{
 			{
-				view.m_lods[l] = view.m_lods[l - 1];
+				meshLods[i * kMaxLodCount + l] = meshLods[i * kMaxLodCount + (l - 1)];
 			}
 			}
 		}
 		}
 
 
-		gpuScenePatcher.newCopy(*info.m_framePool, m_gpuSceneMeshGpuViews.m_offset, meshViews.getSizeInBytes(),
-								&meshViews[0]);
+		gpuScenePatcher.newCopy(*info.m_framePool, m_gpuSceneMeshLods.m_offset, meshLods.getSizeInBytes(),
+								&meshLods[0]);
 
 
 		// Upload the uniforms
 		// Upload the uniforms
 		DynamicArrayRaii<U32> allUniforms(info.m_framePool, U32(m_gpuSceneUniforms.m_size / 4));
 		DynamicArrayRaii<U32> allUniforms(info.m_framePool, U32(m_gpuSceneUniforms.m_size / 4));

+ 3 - 3
AnKi/Scene/Components/ModelComponent.h

@@ -41,10 +41,10 @@ public:
 		return m_model.isCreated();
 		return m_model.isCreated();
 	}
 	}
 
 
-	U32 getMeshViewsGpuSceneOffset() const
+	U32 getGpuSceneMeshLodsOffset() const
 	{
 	{
 		ANKI_ASSERT((m_gpuSceneMeshGpuViews.m_offset % 4) == 0);
 		ANKI_ASSERT((m_gpuSceneMeshGpuViews.m_offset % 4) == 0);
-		return U32(m_gpuSceneMeshGpuViews.m_offset);
+		return U32(m_gpuSceneMeshLods.m_offset);
 	}
 	}
 
 
 	U32 getUniformsGpuSceneOffset(U32 meshPatchIdx) const
 	U32 getUniformsGpuSceneOffset(U32 meshPatchIdx) const
@@ -59,7 +59,7 @@ private:
 	DynamicArray<U64> m_modelPatchMergeKeys;
 	DynamicArray<U64> m_modelPatchMergeKeys;
 	Bool m_dirty = true;
 	Bool m_dirty = true;
 
 
-	SegregatedListsGpuMemoryPoolToken m_gpuSceneMeshGpuViews;
+	SegregatedListsGpuMemoryPoolToken m_gpuSceneMeshLods;
 	SegregatedListsGpuMemoryPoolToken m_gpuSceneUniforms;
 	SegregatedListsGpuMemoryPoolToken m_gpuSceneUniforms;
 	DynamicArray<U32> m_gpuSceneUniformsOffsetPerPatch;
 	DynamicArray<U32> m_gpuSceneUniformsOffsetPerPatch;
 
 

+ 2 - 2
AnKi/Scene/ModelNode.cpp

@@ -198,8 +198,8 @@ void ModelNode::initRenderComponents()
 		renderable.m_worldTransformsOffset = getFirstComponentOfType<MoveComponent>().getTransformsGpuSceneOffset();
 		renderable.m_worldTransformsOffset = getFirstComponentOfType<MoveComponent>().getTransformsGpuSceneOffset();
 		renderable.m_aabbOffset = getFirstComponentOfType<SpatialComponent>().getAabbGpuSceneOffset();
 		renderable.m_aabbOffset = getFirstComponentOfType<SpatialComponent>().getAabbGpuSceneOffset();
 		renderable.m_uniformsOffset = getFirstComponentOfType<ModelComponent>().getUniformsGpuSceneOffset(patchIdx);
 		renderable.m_uniformsOffset = getFirstComponentOfType<ModelComponent>().getUniformsGpuSceneOffset(patchIdx);
-		renderable.m_geometryOffset =
-			getFirstComponentOfType<ModelComponent>().getMeshViewsGpuSceneOffset() + sizeof(GpuSceneMesh) * patchIdx;
+		renderable.m_geometryOffset = getFirstComponentOfType<ModelComponent>().getGpuSceneMeshLodsOffset()
+									  + sizeof(GpuSceneMeshLod) * kMaxLodCount * patchIdx;
 		if(skinc.isEnabled())
 		if(skinc.isEnabled())
 		{
 		{
 			renderable.m_boneTransformsOffset = skinc.getBoneTransformsGpuSceneOffset();
 			renderable.m_boneTransformsOffset = skinc.getBoneTransformsGpuSceneOffset();

+ 3 - 2
AnKi/Shaders/ForwardShadingGenericTransparent.ankiprog

@@ -42,9 +42,10 @@ VertOut main(VertIn input)
 	const UnpackedGpuSceneRenderableInstance instance =
 	const UnpackedGpuSceneRenderableInstance instance =
 		unpackRenderableGpuViewInstance(input.m_gpuSceneRenderableInstance);
 		unpackRenderableGpuViewInstance(input.m_gpuSceneRenderableInstance);
 	const GpuSceneRenderable renderable = g_gpuScene.Load<GpuSceneRenderable>(instance.m_renderableOffset);
 	const GpuSceneRenderable renderable = g_gpuScene.Load<GpuSceneRenderable>(instance.m_renderableOffset);
-	const GpuSceneMesh mesh = g_gpuScene.Load<GpuSceneMesh>(renderable.m_geometryOffset);
+	const GpuSceneMeshLod mesh =
+		g_gpuScene.Load<GpuSceneMeshLod>(renderable.m_geometryOffset * sizeof(GpuSceneMeshLod) * instance.m_lod);
 	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
 	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
-	const UnpackedMeshVertex vertex = loadVertex(mesh, instance.m_lod, input.m_svVertexId, false);
+	const UnpackedMeshVertex vertex = loadVertex(mesh, input.m_svVertexId, false);
 
 
 	output.m_worldPosition = mul(worldTransform, Vec4(vertex.m_position, 1.0));
 	output.m_worldPosition = mul(worldTransform, Vec4(vertex.m_position, 1.0));
 
 

+ 3 - 2
AnKi/Shaders/GBufferGeneric.ankiprog

@@ -175,8 +175,9 @@ VertOut main(VertIn input)
 	const UnpackedGpuSceneRenderableInstance instance =
 	const UnpackedGpuSceneRenderableInstance instance =
 		unpackRenderableGpuViewInstance(input.m_gpuSceneRenderableInstance);
 		unpackRenderableGpuViewInstance(input.m_gpuSceneRenderableInstance);
 	const GpuSceneRenderable renderable = g_gpuScene.Load<GpuSceneRenderable>(instance.m_renderableOffset);
 	const GpuSceneRenderable renderable = g_gpuScene.Load<GpuSceneRenderable>(instance.m_renderableOffset);
-	const GpuSceneMesh mesh = g_gpuScene.Load<GpuSceneMesh>(renderable.m_geometryOffset);
-	UnpackedMeshVertex vert = loadVertex(mesh, instance.m_lod, input.m_svVertexId, ANKI_BONES);
+	const GpuSceneMeshLod mesh =
+		g_gpuScene.Load<GpuSceneMeshLod>(renderable.m_geometryOffset + sizeof(GpuSceneMeshLod) * instance.m_lod);
+	UnpackedMeshVertex vert = loadVertex(mesh, input.m_svVertexId, ANKI_BONES);
 
 
 	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
 	const Mat3x4 worldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset);
 	const Mat3x4 prevWorldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset + sizeof(Mat3x4));
 	const Mat3x4 prevWorldTransform = g_gpuScene.Load<Mat3x4>(renderable.m_worldTransformsOffset + sizeof(Mat3x4));

+ 1 - 7
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -28,17 +28,11 @@ struct GpuSceneMeshLod
 	U32 m_vertexOffsets[(U32)VertexStreamId::kMeshRelatedCount];
 	U32 m_vertexOffsets[(U32)VertexStreamId::kMeshRelatedCount];
 	U32 m_indexCount;
 	U32 m_indexCount;
 	U32 m_indexOffset; // TODO Decide on its type
 	U32 m_indexOffset; // TODO Decide on its type
-};
-static_assert(sizeof(GpuSceneMeshLod) == sizeof(Vec4) * 2);
-
-struct GpuSceneMesh
-{
-	GpuSceneMeshLod m_lods[kMaxLodCount];
 
 
 	Vec3 m_positionTranslation;
 	Vec3 m_positionTranslation;
 	F32 m_positionScale;
 	F32 m_positionScale;
 };
 };
-static_assert(sizeof(GpuSceneMesh) == sizeof(Vec4) * (kMaxLodCount * 2 + 1));
+static_assert(sizeof(GpuSceneMeshLod) == sizeof(Vec4) * 3);
 
 
 struct GpuSceneParticles
 struct GpuSceneParticles
 {
 {

+ 2 - 4
AnKi/Shaders/MaterialShadersCommon.hlsl

@@ -46,13 +46,11 @@ _ANKI_DEFINE_BINDING(R8G8B8A8_Uint, UVec4)
 #	include <AnKi/Shaders/ClusteredShadingCommon.hlsl>
 #	include <AnKi/Shaders/ClusteredShadingCommon.hlsl>
 #endif
 #endif
 
 
-UnpackedMeshVertex loadVertex(GpuSceneMesh mesh, U32 lod, U32 svVertexId, Bool bones)
+UnpackedMeshVertex loadVertex(GpuSceneMeshLod mlod, U32 svVertexId, Bool bones)
 {
 {
-	GpuSceneMeshLod mlod = mesh.m_lods[lod];
-
 	UnpackedMeshVertex v;
 	UnpackedMeshVertex v;
 	v.m_position = g_unifiedGeom_R16G16B16A16_Unorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kPosition] + svVertexId];
 	v.m_position = g_unifiedGeom_R16G16B16A16_Unorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kPosition] + svVertexId];
-	v.m_position = v.m_position * mesh.m_positionScale + mesh.m_positionTranslation;
+	v.m_position = v.m_position * mlod.m_positionScale + mlod.m_positionTranslation;
 
 
 	v.m_normal = g_unifiedGeom_R8G8B8A8_Snorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kNormal] + svVertexId].xyz;
 	v.m_normal = g_unifiedGeom_R8G8B8A8_Snorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kNormal] + svVertexId].xyz;
 	v.m_tangent = g_unifiedGeom_R8G8B8A8_Snorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kTangent] + svVertexId];
 	v.m_tangent = g_unifiedGeom_R8G8B8A8_Snorm[mlod.m_vertexOffsets[(U32)VertexStreamId::kTangent] + svVertexId];