Browse Source

Optimize GPU visibility a bit and fix a bug in shader reflection

Panagiotis Christopoulos Charitos 2 years ago
parent
commit
31aeb102f2

+ 2 - 1
AnKi/Gr/Vulkan/ShaderImpl.cpp

@@ -140,7 +140,7 @@ void ShaderImpl::doReflection(ConstWeakArray<U8> spirv, SpecConstsVector& specCo
 	Array<U32, kMaxDescriptorSets> counts = {};
 	Array<U32, kMaxDescriptorSets> counts = {};
 	Array2d<DescriptorBinding, kMaxDescriptorSets, kMaxBindingsPerDescriptorSet> descriptors;
 	Array2d<DescriptorBinding, kMaxDescriptorSets, kMaxBindingsPerDescriptorSet> descriptors;
 
 
-	auto func = [&](const spirv_cross::SmallVector<spirv_cross::Resource>& resources, DescriptorType type) -> void {
+	auto func = [&](const spirv_cross::SmallVector<spirv_cross::Resource>& resources, const DescriptorType origType) -> void {
 		for(const spirv_cross::Resource& r : resources)
 		for(const spirv_cross::Resource& r : resources)
 		{
 		{
 			const U32 id = r.id;
 			const U32 id = r.id;
@@ -162,6 +162,7 @@ void ShaderImpl::doReflection(ConstWeakArray<U8> spirv, SpecConstsVector& specCo
 			m_activeBindingMask[set].set(set);
 			m_activeBindingMask[set].set(set);
 
 
 			// Images are special, they might be texel buffers
 			// Images are special, they might be texel buffers
+			DescriptorType type = origType;
 			if(type == DescriptorType::kTexture)
 			if(type == DescriptorType::kTexture)
 			{
 			{
 				if(typeInfo.image.dim == spv::DimBuffer && typeInfo.image.sampled == 1)
 				if(typeInfo.image.dim == spv::DimBuffer && typeInfo.image.sampled == 1)

+ 1 - 1
AnKi/Shaders/ClusterBinning.ankiprog

@@ -10,7 +10,7 @@
 #pragma anki start comp
 #pragma anki start comp
 
 
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
-#include <AnKi/Shaders/CollisionFunctions.hlsl>
+#include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 
 
 ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 1u);
 ANKI_SPECIALIZATION_CONSTANT_U32(kZSplitCount, 1u);
 
 

+ 2 - 1
AnKi/Shaders/DbgRenderables.ankiprog

@@ -40,7 +40,8 @@ VertOut main(VertIn input)
 	if(input.m_svInstanceId < bvolumeCount)
 	if(input.m_svInstanceId < bvolumeCount)
 	{
 	{
 		const GpuSceneRenderableBoundingVolume bvol = g_renderableBoundingVolumes[g_visibleRenderableBoundingVolumeIndices[input.m_svInstanceId + 1]];
 		const GpuSceneRenderableBoundingVolume bvol = g_renderableBoundingVolumes[g_visibleRenderableBoundingVolumeIndices[input.m_svInstanceId + 1]];
-		Vec3 localPos = input.m_position * bvol.m_aabbExtend + bvol.m_sphereCenter;
+		const Vec3 boxCenter = (bvol.m_aabbMin + bvol.m_aabbMax) * 0.5f;
+		Vec3 localPos = input.m_position * (bvol.m_aabbMax - boxCenter) + boxCenter;
 		output.m_svPosition = mul(g_consts.m_viewProjMat, Vec4(localPos, 1.0));
 		output.m_svPosition = mul(g_consts.m_viewProjMat, Vec4(localPos, 1.0));
 	}
 	}
 	else
 	else

+ 12 - 25
AnKi/Shaders/GpuVisibility.ankiprog

@@ -15,7 +15,7 @@
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
 #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
-#include <AnKi/Shaders/CollisionFunctions.hlsl>
+#include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 
 
 struct DrawIndirectArgsWithPadding
 struct DrawIndirectArgsWithPadding
 {
 {
@@ -78,36 +78,23 @@ struct DrawIndirectArgsWithPadding
 
 
 	const GpuSceneRenderableBoundingVolume bvolume = g_renderableBoundingVolumes[bvolumeIdx];
 	const GpuSceneRenderableBoundingVolume bvolume = g_renderableBoundingVolumes[bvolumeIdx];
 
 
+	const Vec3 sphereCenter = (bvolume.m_aabbMin + bvolume.m_aabbMax) * 0.5f;
+	const F32 sphereRadius = bvolume.m_sphereRadius;
+
 #if DISTANCE_TEST == 0
 #if DISTANCE_TEST == 0
 	// Frustum test
 	// Frustum test
 	//
 	//
-	if(!frustumTest(g_consts.m_clipPlanes, bvolume.m_sphereCenter, bvolume.m_sphereRadius))
+	if(!frustumTest(g_consts.m_clipPlanes, sphereCenter, sphereRadius))
 	{
 	{
 		return;
 		return;
 	}
 	}
 
 
 	// Screen-space AABB calculation and checking
 	// Screen-space AABB calculation and checking
 	//
 	//
-	const Vec3 A = bvolume.m_sphereCenter - bvolume.m_aabbExtend;
-	const Vec3 B = bvolume.m_sphereCenter + bvolume.m_aabbExtend;
-	const Vec3 aabbEdges[8u] = {Vec3(A.x, A.y, A.z), Vec3(B.x, A.y, A.z), Vec3(A.x, B.y, A.z), Vec3(A.x, A.y, B.z),
-								Vec3(B.x, B.y, A.z), Vec3(B.x, A.y, B.z), Vec3(A.x, B.y, B.z), Vec3(B.x, B.y, B.z)};
-
-	F32 aabbMinDepth = 1.0f;
-	Vec2 minNdc = 1000.0f;
-	Vec2 maxNdc = -1000.0f;
-	[unroll] for(U32 i = 0; i < 8; ++i)
-	{
-		Vec4 p = mul(g_consts.m_viewProjectionMat, Vec4(aabbEdges[i], 1.0f));
-
-		p.xyz /= abs(p.w);
-
-		minNdc = min(minNdc, p.xy);
-		maxNdc = max(maxNdc, p.xy);
-		aabbMinDepth = min(aabbMinDepth, p.z);
-	}
+	Vec2 minNdc, maxNdc;
+	F32 aabbMinDepth;
+	projectAabb(bvolume.m_aabbMin, bvolume.m_aabbMax, g_consts.m_viewProjectionMat, minNdc, maxNdc, aabbMinDepth);
 
 
-	aabbMinDepth = saturate(aabbMinDepth);
 	if(any(minNdc > 1.0f) || any(maxNdc < -1.0f))
 	if(any(minNdc > 1.0f) || any(maxNdc < -1.0f))
 	{
 	{
 		// Outside of the screen
 		// Outside of the screen
@@ -162,7 +149,7 @@ struct DrawIndirectArgsWithPadding
 	}
 	}
 #	endif // HZB_TEST
 #	endif // HZB_TEST
 #else // DISTANCE_TEST == 1
 #else // DISTANCE_TEST == 1
-	if(!testSphereSphereCollision(bvolume.m_sphereCenter, bvolume.m_sphereRadius, g_consts.m_pointOfTest, g_consts.m_testRadius))
+	if(!testSphereSphereCollision(sphereCenter, sphereRadius, g_consts.m_pointOfTest, g_consts.m_testRadius))
 	{
 	{
 		return;
 		return;
 	}
 	}
@@ -170,7 +157,7 @@ struct DrawIndirectArgsWithPadding
 
 
 	// Compute the LOD
 	// Compute the LOD
 	//
 	//
-	const F32 distFromLodPoint = length(bvolume.m_sphereCenter - g_consts.m_lodReferencePoint) - bvolume.m_sphereRadius;
+	const F32 distFromLodPoint = length(sphereCenter - g_consts.m_lodReferencePoint) - sphereRadius;
 
 
 	U32 lod;
 	U32 lod;
 	if(distFromLodPoint < g_consts.m_maxLodDistances[0])
 	if(distFromLodPoint < g_consts.m_maxLodDistances[0])
@@ -188,8 +175,8 @@ struct DrawIndirectArgsWithPadding
 
 
 	// Add the drawcall
 	// Add the drawcall
 	//
 	//
-	const U32 renderStateBucket = bvolume.m_renderableIndexAndRenderStateBucket & ((1u << 12u) - 1u);
-	const U32 renderableIdx = bvolume.m_renderableIndexAndRenderStateBucket >> 12u;
+	const U32 renderStateBucket = bvolume.m_renderableIndex_20bit_renderStateBucket_12bit & ((1u << 12u) - 1u);
+	const U32 renderableIdx = bvolume.m_renderableIndex_20bit_renderStateBucket_12bit >> 12u;
 
 
 	const GpuSceneRenderable renderable = g_renderables[renderableIdx];
 	const GpuSceneRenderable renderable = g_renderables[renderableIdx];
 	const U32 meshLodOffset = renderable.m_meshLodsOffset + sizeof(GpuSceneMeshLod) * lod;
 	const U32 meshLodOffset = renderable.m_meshLodsOffset + sizeof(GpuSceneMeshLod) * lod;

+ 8 - 5
AnKi/Shaders/GpuVisibilityAccelerationStructures.ankiprog

@@ -8,7 +8,7 @@
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
 #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
-#include <AnKi/Shaders/CollisionFunctions.hlsl>
+#include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 
 
 // Buffers that point to the GPU scene
 // Buffers that point to the GPU scene
 [[vk::binding(0)]] StructuredBuffer<GpuSceneRenderableBoundingVolume> g_renderableBoundingVolumes;
 [[vk::binding(0)]] StructuredBuffer<GpuSceneRenderableBoundingVolume> g_renderableBoundingVolumes;
@@ -37,10 +37,13 @@
 
 
 	// Sphere test
 	// Sphere test
 	GpuSceneRenderableBoundingVolume bvolume;
 	GpuSceneRenderableBoundingVolume bvolume;
+	Vec3 sphereCenter;
 	if(visible)
 	if(visible)
 	{
 	{
+		sphereCenter = (bvolume.m_aabbMin + bvolume.m_aabbMax) * 0.5f;
+
 		bvolume = g_renderableBoundingVolumes[bvolumeIdx];
 		bvolume = g_renderableBoundingVolumes[bvolumeIdx];
-		visible = testSphereSphereCollision(bvolume.m_sphereCenter, bvolume.m_sphereRadius, g_consts.m_pointOfTest, g_consts.m_testRadius);
+		visible = testSphereSphereCollision(sphereCenter, bvolume.m_sphereRadius, g_consts.m_pointOfTest, g_consts.m_testRadius);
 	}
 	}
 
 
 	// All good, write the instance
 	// All good, write the instance
@@ -48,11 +51,11 @@
 	{
 	{
 		// LOD selection
 		// LOD selection
 		U32 lod;
 		U32 lod;
-		const Bool insideCameraFrustum = frustumTest(g_consts.m_clipPlanes, bvolume.m_sphereCenter, bvolume.m_sphereRadius);
+		const Bool insideCameraFrustum = frustumTest(g_consts.m_clipPlanes, sphereCenter, bvolume.m_sphereRadius);
 		if(insideCameraFrustum)
 		if(insideCameraFrustum)
 		{
 		{
 			// Visible by the camera, need to match the camera LODs
 			// Visible by the camera, need to match the camera LODs
-			const F32 distFromLodPoint = length(bvolume.m_sphereCenter - g_consts.m_pointOfTest) - bvolume.m_sphereRadius;
+			const F32 distFromLodPoint = length(sphereCenter - g_consts.m_pointOfTest) - bvolume.m_sphereRadius;
 			if(distFromLodPoint < g_consts.m_maxLodDistances[0])
 			if(distFromLodPoint < g_consts.m_maxLodDistances[0])
 			{
 			{
 				lod = 0u;
 				lod = 0u;
@@ -72,7 +75,7 @@
 			lod = 2u;
 			lod = 2u;
 		}
 		}
 
 
-		const U32 renderableIdx = bvolume.m_renderableIndexAndRenderStateBucket >> 12u;
+		const U32 renderableIdx = bvolume.m_renderableIndex_20bit_renderStateBucket_12bit >> 12u;
 		const GpuSceneRenderable renderable = g_renderables[renderableIdx];
 		const GpuSceneRenderable renderable = g_renderables[renderableIdx];
 
 
 		const U32 meshLodOffset = renderable.m_meshLodsOffset + sizeof(GpuSceneMeshLod) * lod;
 		const U32 meshLodOffset = renderable.m_meshLodsOffset + sizeof(GpuSceneMeshLod) * lod;

+ 1 - 1
AnKi/Shaders/GpuVisibilityNonRenderables.ankiprog

@@ -15,7 +15,7 @@
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Common.hlsl>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuSceneTypes.h>
 #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
 #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
-#include <AnKi/Shaders/CollisionFunctions.hlsl>
+#include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 
 
 #if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
 #if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
 typedef GpuSceneLight ObjectType;
 typedef GpuSceneLight ObjectType;

+ 8 - 7
AnKi/Shaders/Include/GpuSceneFunctions.h

@@ -22,21 +22,22 @@ inline GpuSceneRenderableVertex unpackGpuSceneRenderableVertex(UVec4 x)
 inline GpuSceneRenderableBoundingVolume initGpuSceneRenderableBoundingVolume(Vec3 aabbMin, Vec3 aabbMax, U32 renderableIndex, U32 renderStateBucket)
 inline GpuSceneRenderableBoundingVolume initGpuSceneRenderableBoundingVolume(Vec3 aabbMin, Vec3 aabbMax, U32 renderableIndex, U32 renderStateBucket)
 {
 {
 	GpuSceneRenderableBoundingVolume gpuVolume;
 	GpuSceneRenderableBoundingVolume gpuVolume;
+	gpuVolume.m_aabbMin = aabbMin;
+	gpuVolume.m_aabbMax = aabbMax;
 
 
-	gpuVolume.m_sphereCenter = (aabbMin + aabbMax) * 0.5f;
-	gpuVolume.m_aabbExtend = aabbMax - gpuVolume.m_sphereCenter;
+	const Vec3 sphereCenter = (aabbMin + aabbMax) * 0.5f;
+	const Vec3 aabbExtend = aabbMax - sphereCenter;
 #if defined(__cplusplus)
 #if defined(__cplusplus)
-	gpuVolume.m_sphereRadius = gpuVolume.m_aabbExtend.getLength();
+	gpuVolume.m_sphereRadius = aabbExtend.getLength();
 #else
 #else
-	gpuVolume.m_sphereRadius = length(gpuVolume.m_aabbExtend);
+	gpuVolume.m_sphereRadius = length(aabbExtend);
 #endif
 #endif
 
 
 	ANKI_ASSERT(renderableIndex <= (1u << 20u) - 1u);
 	ANKI_ASSERT(renderableIndex <= (1u << 20u) - 1u);
-	gpuVolume.m_renderableIndexAndRenderStateBucket = renderableIndex << 12u;
+	gpuVolume.m_renderableIndex_20bit_renderStateBucket_12bit = renderableIndex << 12u;
 
 
 	ANKI_ASSERT(renderStateBucket <= (1u << 12u) - 1u);
 	ANKI_ASSERT(renderStateBucket <= (1u << 12u) - 1u);
-	gpuVolume.m_renderableIndexAndRenderStateBucket |= renderStateBucket;
-
+	gpuVolume.m_renderableIndex_20bit_renderStateBucket_12bit |= renderStateBucket;
 	return gpuVolume;
 	return gpuVolume;
 }
 }
 
 

+ 3 - 3
AnKi/Shaders/Include/GpuSceneTypes.h

@@ -48,11 +48,11 @@ static_assert(kMaxLodCount == 3);
 /// Used in visibility testing.
 /// Used in visibility testing.
 struct GpuSceneRenderableBoundingVolume
 struct GpuSceneRenderableBoundingVolume
 {
 {
-	Vec3 m_sphereCenter ANKI_CPP_CODE(= Vec3(kSomeFarDistance));
+	Vec3 m_aabbMin ANKI_CPP_CODE(= Vec3(kSomeFarDistance));
 	F32 m_sphereRadius ANKI_CPP_CODE(= 0.0f);
 	F32 m_sphereRadius ANKI_CPP_CODE(= 0.0f);
 
 
-	Vec3 m_aabbExtend ANKI_CPP_CODE(= Vec3(0.0f));
-	U32 m_renderableIndexAndRenderStateBucket; ///< High 20bits point to a GpuSceneRenderable. Rest 12bits are the render state bucket idx.
+	Vec3 m_aabbMax ANKI_CPP_CODE(= Vec3(kSomeFarDistance));
+	U32 m_renderableIndex_20bit_renderStateBucket_12bit; ///< High 20bits point to a GpuSceneRenderable. Rest 12bits are the render state bucket idx.
 };
 };
 static_assert(sizeof(GpuSceneRenderableBoundingVolume) == sizeof(Vec4) * 2);
 static_assert(sizeof(GpuSceneRenderableBoundingVolume) == sizeof(Vec4) * 2);
 
 

+ 1 - 1
AnKi/Shaders/LightFunctions.hlsl

@@ -8,7 +8,7 @@
 #pragma once
 #pragma once
 
 
 #include <AnKi/Shaders/Functions.hlsl>
 #include <AnKi/Shaders/Functions.hlsl>
-#include <AnKi/Shaders/CollisionFunctions.hlsl>
+#include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
 #include <AnKi/Shaders/PackFunctions.hlsl>
 #include <AnKi/Shaders/PackFunctions.hlsl>
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
 #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
 #include <AnKi/Shaders/Include/MiscRendererTypes.h>
 #include <AnKi/Shaders/Include/MiscRendererTypes.h>

+ 33 - 0
AnKi/Shaders/CollisionFunctions.hlsl → AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl

@@ -185,3 +185,36 @@ Bool frustumTest(Vec4 frustumPlanes[6], Vec3 sphereCenter, F32 sphereRadius)
 
 
 	return minPlaneDistance > -sphereRadius;
 	return minPlaneDistance > -sphereRadius;
 }
 }
+
+/// Modified version found in https://zeux.io/2023/01/12/approximate-projected-bounds
+void projectAabb(Vec3 aabbMin, Vec3 aabbMax, Mat4 viewProjMat, out Vec2 minNdc, out Vec2 maxNdc, out F32 aabbMinDepth)
+{
+	const Vec4 SX = mul(viewProjMat, Vec4(aabbMax.x - aabbMin.x, 0.0, 0.0, 0.0));
+	const Vec4 SY = mul(viewProjMat, Vec4(0.0, aabbMax.y - aabbMin.y, 0.0, 0.0));
+	const Vec4 SZ = mul(viewProjMat, Vec4(0.0, 0.0, aabbMax.z - aabbMin.z, 0.0));
+
+	Vec4 aabbEdgesClip[8u];
+	aabbEdgesClip[0] = mul(viewProjMat, Vec4(aabbMin.x, aabbMin.y, aabbMin.z, 1.0));
+	aabbEdgesClip[1] = aabbEdgesClip[0] + SZ;
+	aabbEdgesClip[2] = aabbEdgesClip[0] + SY;
+	aabbEdgesClip[3] = aabbEdgesClip[2] + SZ;
+	aabbEdgesClip[4] = aabbEdgesClip[0] + SX;
+	aabbEdgesClip[5] = aabbEdgesClip[4] + SZ;
+	aabbEdgesClip[6] = aabbEdgesClip[4] + SY;
+	aabbEdgesClip[7] = aabbEdgesClip[6] + SZ;
+
+	aabbMinDepth = 1.0f;
+	minNdc = 1000.0f;
+	maxNdc = -1000.0f;
+	[unroll] for(U32 i = 0; i < 8; ++i)
+	{
+		Vec4 p = aabbEdgesClip[i];
+		p.xyz /= abs(p.w);
+
+		minNdc = min(minNdc, p.xy);
+		maxNdc = max(maxNdc, p.xy);
+		aabbMinDepth = min(aabbMinDepth, p.z);
+	}
+
+	aabbMinDepth = saturate(aabbMinDepth);
+}