Browse Source

Add the base of the cluster bin shader

Panagiotis Christopoulos Charitos 4 years ago
parent
commit
26d3073e62

+ 0 - 1
AnKi/Collision/Functions.h

@@ -184,7 +184,6 @@ inline Bool testCollision(const Ray& a, const Cone& b)
 {
 {
 	return testCollision(b, a);
 	return testCollision(b, a);
 }
 }
-Bool testCollision(const Cone& a, const Ray& b);
 
 
 // Extra testCollision functions
 // Extra testCollision functions
 
 

+ 2 - 2
AnKi/Math/Mat.h

@@ -534,7 +534,7 @@ public:
 
 
 	/// @name Operators with other types
 	/// @name Operators with other types
 	/// @{
 	/// @{
-	ANKI_ENABLE_METHOD(!HAS_MAT3X4_SIMD)
+	ANKI_ENABLE_METHOD(!HAS_SIMD)
 	ColumnVec operator*(const RowVec& v) const
 	ColumnVec operator*(const RowVec& v) const
 	{
 	{
 		const TMat& m = *this;
 		const TMat& m = *this;
@@ -551,7 +551,7 @@ public:
 		return out;
 		return out;
 	}
 	}
 
 
-	ANKI_ENABLE_METHOD(HAS_MAT3X4_SIMD)
+	ANKI_ENABLE_METHOD(HAS_SIMD)
 	ColumnVec operator*(const RowVec& v) const
 	ColumnVec operator*(const RowVec& v) const
 	{
 	{
 		ColumnVec out;
 		ColumnVec out;

+ 1 - 1
AnKi/Renderer/Renderer.cpp

@@ -719,7 +719,7 @@ void Renderer::writeClustererBuffersTask(RenderingContext& ctx)
 			m_stagingMem->allocateFrame(sizeof(ClustererUniforms), StagingGpuMemoryType::UNIFORM,
 			m_stagingMem->allocateFrame(sizeof(ClustererUniforms), StagingGpuMemoryType::UNIFORM,
 										ctx.m_clustererGpuObjects.m_lightingUniformsToken));
 										ctx.m_clustererGpuObjects.m_lightingUniformsToken));
 
 
-		unis->m_rendereringSize = Vec2(F32(m_width), F32(m_height));
+		unis->m_renderingSize = Vec2(F32(m_width), F32(m_height));
 
 
 		unis->m_time = F32(HighRezTimer::getCurrentTime());
 		unis->m_time = F32(HighRezTimer::getCurrentTime());
 		unis->m_frame = m_frameCount & MAX_U32;
 		unis->m_frame = m_frameCount & MAX_U32;

+ 1 - 0
AnKi/ShaderCompiler/ShaderProgramParser.cpp

@@ -54,6 +54,7 @@ static const char* SHADER_HEADER = R"(#version 460 core
 #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float32 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float32 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float64 : enable
 #extension GL_EXT_shader_explicit_arithmetic_types_float64 : enable
+#extension GL_EXT_shader_atomic_int64 : enable
 
 
 #extension GL_EXT_nonuniform_qualifier : enable
 #extension GL_EXT_nonuniform_qualifier : enable
 #extension GL_EXT_scalar_block_layout : enable
 #extension GL_EXT_scalar_block_layout : enable

+ 141 - 0
AnKi/Shaders/ClusterBin.ankiprog

@@ -0,0 +1,141 @@
+// Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
+// All rights reserved.
+// Code licensed under the BSD License.
+// http://www.anki3d.org/LICENSE
+
+ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 0, 1u);
+ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_X, 1, 1u);
+ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_Y, 2, 1u);
+ANKI_SPECIALIZATION_CONSTANT_U32(Z_SPLIT_COUNT, 3, 1u);
+
+#pragma anki start comp
+
+#include <AnKi/Shaders/Include/ClustererTypes.h>
+#include <AnKi/Shaders/Common.glsl>
+#include <AnKi/Shaders/CollisionFunctions.glsl>
+
+const U32 WORKGROUP_SIZE = 64u;
+layout(local_size_x = WORKGROUP_SIZE) in;
+
+layout(set = 0, binding = 0) uniform b_unis
+{
+	ClustererUniforms u_unis;
+};
+
+layout(set = 0, binding = 1) writeonly buffer b_tiles
+{
+	TileOrZSplit u_tilesAndZSplits[];
+};
+
+layout(set = 0, binding = 2) readonly buffer b_plights
+{
+	PointLight2 u_pointLights[];
+};
+
+const U32 TILE_COUNT = TILE_COUNT_X * TILE_COUNT_Y;
+
+// DX Sample locations
+const U32 SAMPLE_COUNT = 4u;
+const I32 SAMPLE_OFFSET = 8;
+const IVec2 SAMPLE_LOCATIONS[SAMPLE_COUNT] =
+	IVec2[SAMPLE_COUNT](IVec2(-2, -6), IVec2(6, -2), IVec2(-6, 2), IVec2(2, 6));
+
+// A mask per tile of this workgroup for the clusterer object being processed by this workgroup
+const U32 TILES_PER_WORKGROUP = WORKGROUP_SIZE / SAMPLE_COUNT;
+shared U64 s_tileMasks[TILES_PER_WORKGROUP];
+
+// A mask for each Z split for a specific clusterer object
+shared U64 s_zSplitMasks[Z_SPLIT_COUNT];
+
+void main()
+{
+	const U32 tileIdx = gl_GlobalInvocationID.x / SAMPLE_COUNT;
+	const U32 localTileIdx = gl_LocalInvocationIndex / SAMPLE_COUNT;
+	const U32 sampleIdx = gl_GlobalInvocationID.x % SAMPLE_COUNT;
+	const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
+	if(tileIdx >= TILE_COUNT)
+	{
+		// Early exit
+		return;
+	}
+
+	const UVec2 tileXY = UVec2(tileIdx % TILE_COUNT_X, tileIdx / TILE_COUNT_X);
+
+	// This is a pixel in one of the main framebuffers of the renderer, eg the gbuffer's framebuffers
+	const UVec2 pixel = tileXY * TILE_SIZE + UVec2(SAMPLE_LOCATIONS[sampleIdx] + SAMPLE_OFFSET);
+
+	const Vec2 uv = Vec2(pixel) / Vec2(u_unis.m_renderingSize);
+	const Vec2 ndc = UV_TO_NDC(uv);
+
+	// Unproject the sample in view space
+	const Vec4 farWorldPos4 = u_unis.m_matrices.m_invertedViewProjection * Vec4(ndc, 1.0, 1.0);
+	const Vec3 farWorldPos = farWorldPos4.xyz / farWorldPos4.w;
+
+	// Create the ray that will test the clusterer objects
+	const Vec3 rayOrigin = u_unis.m_cameraPosition;
+	const Vec3 rayDir = normalize(farWorldPos - rayOrigin);
+
+	// Zero shared memory
+	s_tileMasks[localTileIdx] = 0u;
+	const U32 splitsPerInvocation = max(1u, Z_SPLIT_COUNT / WORKGROUP_SIZE);
+	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
+		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
+	{
+		s_zSplitMasks[i];
+	}
+	memoryBarrierShared();
+	barrier();
+
+	// Do collision
+	F32 t0, t1;
+	U64 mask;
+	if(clustererObjectIdx < u_unis.m_pointLightCount)
+	{
+		const U32 lightIdx = clustererObjectIdx;
+		const PointLight2 light = u_pointLights[lightIdx];
+		const Bool collides = testRaySphere(rayOrigin, rayDir, light.m_position, light.m_radius, t0, t1);
+		mask = (collides) ? (1u << U64(lightIdx)) : 0u;
+		atomicOr(s_tileMasks[localTileIdx], mask);
+	}
+
+	// Compute the Z splits
+	const Vec3 hitpointA = rayDir * t0 + rayOrigin;
+	const Vec3 hitpointB = rayDir * t1 + rayOrigin;
+	const F32 distFromNearPlaneA = testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointA);
+	const F32 distFromNearPlaneB = testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointB);
+	const F32 minDistFromNearPlane = min(distFromNearPlaneA, distFromNearPlaneB);
+	const F32 maxDistFromNearPlane = max(distFromNearPlaneA, distFromNearPlaneB);
+	const U32 startZSplit = max(0u, U32(minDistFromNearPlane * u_unis.m_oneOverFrustumLength));
+	const U32 endZSplit = min(Z_SPLIT_COUNT - 1u, U32(maxDistFromNearPlane * u_unis.m_oneOverFrustumLength));
+	for(U32 i = startZSplit; i <= endZSplit; ++i)
+	{
+		atomicOr(s_zSplitMasks[i], mask);
+	}
+
+	// Sync
+	memoryBarrierShared();
+	barrier();
+
+	// All invocations write a Z split
+	for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
+		i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
+	{
+		if(clustererObjectIdx < u_unis.m_pointLightCount)
+		{
+			atomicOr(u_tilesAndZSplits[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
+		}
+	}
+
+	// First sample writes the tile
+	if((gl_LocalInvocationIndex % SAMPLE_COUNT) != 0)
+	{
+		return;
+	}
+
+	if(clustererObjectIdx < u_unis.m_pointLightCount)
+	{
+		atomicOr(u_tilesAndZSplits[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
+	}
+}
+
+#pragma anki end

+ 20 - 2
AnKi/Shaders/CollisionFunctions.glsl

@@ -56,10 +56,28 @@ F32 testRayAabbInside(Vec3 rayOrigin, Vec3 rayDir, Vec3 aabbMin, Vec3 aabbMax)
 	return distToIntersect;
 	return distToIntersect;
 }
 }
 
 
-Bool testRaySphere(Vec3 rayOrigin, Vec3 rayDir, Vec3 sphereCenter, F32 sphereRadius)
+/// https://www.scratchapixel.com/lessons/3d-basic-rendering/minimal-ray-tracer-rendering-simple-shapes/ray-sphere-intersection
+Bool testRaySphere(Vec3 rayOrigin, Vec3 rayDir, Vec3 sphereCenter, F32 sphereRadius, out F32 t0, out F32 t1)
 {
 {
 	const Vec3 L = sphereCenter - rayOrigin;
 	const Vec3 L = sphereCenter - rayOrigin;
 	const F32 tca = dot(L, rayDir);
 	const F32 tca = dot(L, rayDir);
 	const F32 d2 = dot(L, L) - tca * tca;
 	const F32 d2 = dot(L, L) - tca * tca;
-	return d2 <= radius * radius;
+	const F32 radius2 = sphereRadius * sphereRadius;
+	const F32 diff = radius2 - d2;
+	if(diff < 0.0)
+	{
+		return false;
+	}
+	else
+	{
+		const F32 thc = sqrt(diff);
+		t0 = tca - thc;
+		t1 = tca + thc;
+		return true;
+	}
+}
+
+F32 testPlanePoint(Vec3 planeNormal, F32 planeOffset, Vec3 point)
+{
+	return dot(planeNormal, point) - planeOffset;
 }
 }

+ 27 - 10
AnKi/Shaders/Include/ClustererTypes.h

@@ -9,7 +9,15 @@
 
 
 ANKI_BEGIN_NAMESPACE
 ANKI_BEGIN_NAMESPACE
 
 
+// Enum of clusterer object types
+const U32 CLUSTER_OBJECT_TYPE_POINT_LIGHT = 0u;
+const U32 CLUSTER_OBJECT_TYPE_SPOT_LIGHT = 1u;
+const U32 CLUSTER_OBJECT_TYPE_DECAL = 2u;
+const U32 CLUSTER_OBJECT_TYPE_FOG_DENSITY_VOLUME = 3u;
+const U32 CLUSTER_OBJECT_TYPE_REFLECTION_PROBE = 4u;
+const U32 CLUSTER_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE = 5u;
 const U32 CLUSTER_OBJECT_TYPE_COUNT2 = 6u; ///< Point and spot lights, refl and GI probes, decals and fog volumes.
 const U32 CLUSTER_OBJECT_TYPE_COUNT2 = 6u; ///< Point and spot lights, refl and GI probes, decals and fog volumes.
+
 const F32 CLUSTER_OBJECT_FRUSTUM_NEAR_PLANE = 0.1f / 4.0f; ///< The near plane of various clusterer object frustums.
 const F32 CLUSTER_OBJECT_FRUSTUM_NEAR_PLANE = 0.1f / 4.0f; ///< The near plane of various clusterer object frustums.
 const U32 MAX_SHADOW_CASCADES2 = 4u;
 const U32 MAX_SHADOW_CASCADES2 = 4u;
 
 
@@ -117,9 +125,8 @@ const U32 _ANKI_SIZEOF_GlobalIlluminationProbe2 = 9u * ANKI_SIZEOF(U32);
 ANKI_SHADER_STATIC_ASSERT(sizeof(GlobalIlluminationProbe2) == _ANKI_SIZEOF_GlobalIlluminationProbe2);
 ANKI_SHADER_STATIC_ASSERT(sizeof(GlobalIlluminationProbe2) == _ANKI_SIZEOF_GlobalIlluminationProbe2);
 
 
 /// Common matrices.
 /// Common matrices.
-class CommonMatrices
+struct CommonMatrices
 {
 {
-public:
 	Mat4 m_cameraTransform ANKI_CPP_CODE(= Mat4::getIdentity());
 	Mat4 m_cameraTransform ANKI_CPP_CODE(= Mat4::getIdentity());
 	Mat4 m_view ANKI_CPP_CODE(= Mat4::getIdentity());
 	Mat4 m_view ANKI_CPP_CODE(= Mat4::getIdentity());
 	Mat4 m_projection ANKI_CPP_CODE(= Mat4::getIdentity());
 	Mat4 m_projection ANKI_CPP_CODE(= Mat4::getIdentity());
@@ -130,29 +137,39 @@ public:
 	Mat4 m_viewProjectionJitter ANKI_CPP_CODE(= Mat4::getIdentity());
 	Mat4 m_viewProjectionJitter ANKI_CPP_CODE(= Mat4::getIdentity());
 
 
 	Mat4 m_invertedViewProjectionJitter ANKI_CPP_CODE(= Mat4::getIdentity()); ///< To unproject in world space.
 	Mat4 m_invertedViewProjectionJitter ANKI_CPP_CODE(= Mat4::getIdentity()); ///< To unproject in world space.
+	Mat4 m_invertedViewProjection ANKI_CPP_CODE(= Mat4::getIdentity());
 
 
-	Vec4 m_unprojectionParameters ANKI_CPP_CODE(= Vec4(0.0f)); ///< To unproject. Jitter is not considered.
+	Vec4 m_unprojectionParameters ANKI_CPP_CODE(= Vec4(0.0f)); ///< To unproject to view space. Jitter not considered.
 };
 };
-const U32 _ANKI_SIZEOF_CommonMatrices = 8u * ANKI_SIZEOF(Mat4) + ANKI_SIZEOF(Vec4);
+const U32 _ANKI_SIZEOF_CommonMatrices = 9u * ANKI_SIZEOF(Mat4) + ANKI_SIZEOF(Vec4);
 ANKI_SHADER_STATIC_ASSERT(sizeof(CommonMatrices) == _ANKI_SIZEOF_CommonMatrices);
 ANKI_SHADER_STATIC_ASSERT(sizeof(CommonMatrices) == _ANKI_SIZEOF_CommonMatrices);
 
 
 /// Common uniforms for light shading passes.
 /// Common uniforms for light shading passes.
 struct ClustererUniforms
 struct ClustererUniforms
 {
 {
-	Vec2 m_rendereringSize;
+	Vec2 m_renderingSize;
 
 
 	F32 m_time;
 	F32 m_time;
 	U32 m_frame;
 	U32 m_frame;
 
 
+	Vec4 m_nearPlaneWSpace;
 	F32 m_near;
 	F32 m_near;
 	F32 m_far;
 	F32 m_far;
+	F32 m_oneOverFrustumLength; ///< 1/(far-near)
 	Vec3 m_cameraPosition;
 	Vec3 m_cameraPosition;
 
 
 	UVec2 m_tileCounts;
 	UVec2 m_tileCounts;
 	U32 m_zSplitCount;
 	U32 m_zSplitCount;
 	U32 m_lightVolumeLastCluster;
 	U32 m_lightVolumeLastCluster;
 
 
-	Vec2 m_padding;
+	U32 m_pointLightCount;
+	U32 m_spotLightCount;
+	U32 m_decalCount;
+	U32 m_fogDensityVolumeCount;
+	U32 m_reflectionProbeCount;
+	U32 m_giProbeCount;
+
+	F32 m_padding[3u];
 
 
 	CommonMatrices m_matrices;
 	CommonMatrices m_matrices;
 	CommonMatrices m_previousMatrices;
 	CommonMatrices m_previousMatrices;
@@ -160,11 +177,11 @@ struct ClustererUniforms
 	DirectionalLight2 m_directionalLight;
 	DirectionalLight2 m_directionalLight;
 };
 };
 const U32 _ANKI_SIZEOF_ClustererUniforms =
 const U32 _ANKI_SIZEOF_ClustererUniforms =
-	16u * ANKI_SIZEOF(U32) + 2u * ANKI_SIZEOF(CommonMatrices) + ANKI_SIZEOF(DirectionalLight2);
+	28u * ANKI_SIZEOF(U32) + 2u * ANKI_SIZEOF(CommonMatrices) + ANKI_SIZEOF(DirectionalLight2);
 ANKI_SHADER_STATIC_ASSERT(sizeof(ClustererUniforms) == _ANKI_SIZEOF_ClustererUniforms);
 ANKI_SHADER_STATIC_ASSERT(sizeof(ClustererUniforms) == _ANKI_SIZEOF_ClustererUniforms);
 
 
 /// Information that a tile or a Z-split will contain.
 /// Information that a tile or a Z-split will contain.
-struct Tile
+struct TileOrZSplit
 {
 {
 	U64 m_pointLightsMask;
 	U64 m_pointLightsMask;
 	U64 m_spotLightsMask;
 	U64 m_spotLightsMask;
@@ -174,7 +191,7 @@ struct Tile
 	U32 m_giProbesMask;
 	U32 m_giProbesMask;
 	U32 m_padding; ///< Add some padding to be 100% sure nothing will break.
 	U32 m_padding; ///< Add some padding to be 100% sure nothing will break.
 };
 };
-const U32 _ANKI_SIZEOF_Tile = 5u * ANKI_SIZEOF(U64);
-ANKI_SHADER_STATIC_ASSERT(sizeof(Tile) == _ANKI_SIZEOF_Tile);
+const U32 _ANKI_SIZEOF_TileOrZSplit = 5u * ANKI_SIZEOF(U64);
+ANKI_SHADER_STATIC_ASSERT(sizeof(TileOrZSplit) == _ANKI_SIZEOF_TileOrZSplit);
 
 
 ANKI_END_NAMESPACE
 ANKI_END_NAMESPACE