|
|
@@ -0,0 +1,141 @@
|
|
|
+// Copyright (C) 2009-2021, Panagiotis Christopoulos Charitos and contributors.
|
|
|
+// All rights reserved.
|
|
|
+// Code licensed under the BSD License.
|
|
|
+// http://www.anki3d.org/LICENSE
|
|
|
+
|
|
|
+ANKI_SPECIALIZATION_CONSTANT_U32(TILE_SIZE, 0, 1u);
|
|
|
+ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_X, 1, 1u);
|
|
|
+ANKI_SPECIALIZATION_CONSTANT_U32(TILE_COUNT_Y, 2, 1u);
|
|
|
+ANKI_SPECIALIZATION_CONSTANT_U32(Z_SPLIT_COUNT, 3, 1u);
|
|
|
+
|
|
|
+#pragma anki start comp
|
|
|
+
|
|
|
+#include <AnKi/Shaders/Include/ClustererTypes.h>
|
|
|
+#include <AnKi/Shaders/Common.glsl>
|
|
|
+#include <AnKi/Shaders/CollisionFunctions.glsl>
|
|
|
+
|
|
|
+const U32 WORKGROUP_SIZE = 64u;
|
|
|
+layout(local_size_x = WORKGROUP_SIZE) in;
|
|
|
+
|
|
|
+layout(set = 0, binding = 0) uniform b_unis
|
|
|
+{
|
|
|
+ ClustererUniforms u_unis;
|
|
|
+};
|
|
|
+
|
|
|
+layout(set = 0, binding = 1) writeonly buffer b_tiles
|
|
|
+{
|
|
|
+ TileOrZSplit u_tilesAndZSplits[];
|
|
|
+};
|
|
|
+
|
|
|
+layout(set = 0, binding = 2) readonly buffer b_plights
|
|
|
+{
|
|
|
+ PointLight2 u_pointLights[];
|
|
|
+};
|
|
|
+
|
|
|
+const U32 TILE_COUNT = TILE_COUNT_X * TILE_COUNT_Y;
|
|
|
+
|
|
|
+// DX Sample locations
|
|
|
+const U32 SAMPLE_COUNT = 4u;
|
|
|
+const I32 SAMPLE_OFFSET = 8;
|
|
|
+const IVec2 SAMPLE_LOCATIONS[SAMPLE_COUNT] =
|
|
|
+ IVec2[SAMPLE_COUNT](IVec2(-2, -6), IVec2(6, -2), IVec2(-6, 2), IVec2(2, 6));
|
|
|
+
|
|
|
+// A mask per tile of this workgroup for the clusterer object being processed by this workgroup
|
|
|
+const U32 TILES_PER_WORKGROUP = WORKGROUP_SIZE / SAMPLE_COUNT;
|
|
|
+shared U64 s_tileMasks[TILES_PER_WORKGROUP];
|
|
|
+
|
|
|
+// A mask for each Z split for a specific clusterer object
|
|
|
+shared U64 s_zSplitMasks[Z_SPLIT_COUNT];
|
|
|
+
|
|
|
+void main()
|
|
|
+{
|
|
|
+ const U32 tileIdx = gl_GlobalInvocationID.x / SAMPLE_COUNT;
|
|
|
+ const U32 localTileIdx = gl_LocalInvocationIndex / SAMPLE_COUNT;
|
|
|
+ const U32 sampleIdx = gl_GlobalInvocationID.x % SAMPLE_COUNT;
|
|
|
+ const U32 clustererObjectIdx = gl_GlobalInvocationID.y;
|
|
|
+ if(tileIdx >= TILE_COUNT)
|
|
|
+ {
|
|
|
+ // Early exit
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ const UVec2 tileXY = UVec2(tileIdx % TILE_COUNT_X, tileIdx / TILE_COUNT_X);
|
|
|
+
|
|
|
+ // This is a pixel in one of the main framebuffers of the renderer, eg the gbuffer's framebuffers
|
|
|
+ const UVec2 pixel = tileXY * TILE_SIZE + UVec2(SAMPLE_LOCATIONS[sampleIdx] + SAMPLE_OFFSET);
|
|
|
+
|
|
|
+ const Vec2 uv = Vec2(pixel) / Vec2(u_unis.m_renderingSize);
|
|
|
+ const Vec2 ndc = UV_TO_NDC(uv);
|
|
|
+
|
|
|
+ // Unproject the sample in view space
|
|
|
+ const Vec4 farWorldPos4 = u_unis.m_matrices.m_invertedViewProjection * Vec4(ndc, 1.0, 1.0);
|
|
|
+ const Vec3 farWorldPos = farWorldPos4.xyz / farWorldPos4.w;
|
|
|
+
|
|
|
+ // Create the ray that will test the clusterer objects
|
|
|
+ const Vec3 rayOrigin = u_unis.m_cameraPosition;
|
|
|
+ const Vec3 rayDir = normalize(farWorldPos - rayOrigin);
|
|
|
+
|
|
|
+ // Zero shared memory
|
|
|
+ s_tileMasks[localTileIdx] = 0u;
|
|
|
+ const U32 splitsPerInvocation = max(1u, Z_SPLIT_COUNT / WORKGROUP_SIZE);
|
|
|
+ for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
|
|
|
+ i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
|
|
|
+ {
|
|
|
+ s_zSplitMasks[i];
|
|
|
+ }
|
|
|
+ memoryBarrierShared();
|
|
|
+ barrier();
|
|
|
+
|
|
|
+ // Do collision
|
|
|
+ F32 t0, t1;
|
|
|
+ U64 mask;
|
|
|
+ if(clustererObjectIdx < u_unis.m_pointLightCount)
|
|
|
+ {
|
|
|
+ const U32 lightIdx = clustererObjectIdx;
|
|
|
+ const PointLight2 light = u_pointLights[lightIdx];
|
|
|
+ const Bool collides = testRaySphere(rayOrigin, rayDir, light.m_position, light.m_radius, t0, t1);
|
|
|
+ mask = (collides) ? (1u << U64(lightIdx)) : 0u;
|
|
|
+ atomicOr(s_tileMasks[localTileIdx], mask);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Compute the Z splits
|
|
|
+ const Vec3 hitpointA = rayDir * t0 + rayOrigin;
|
|
|
+ const Vec3 hitpointB = rayDir * t1 + rayOrigin;
|
|
|
+ const F32 distFromNearPlaneA = testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointA);
|
|
|
+ const F32 distFromNearPlaneB = testPlanePoint(u_unis.m_nearPlaneWSpace.xyz, u_unis.m_nearPlaneWSpace.w, hitpointB);
|
|
|
+ const F32 minDistFromNearPlane = min(distFromNearPlaneA, distFromNearPlaneB);
|
|
|
+ const F32 maxDistFromNearPlane = max(distFromNearPlaneA, distFromNearPlaneB);
|
|
|
+ const U32 startZSplit = max(0u, U32(minDistFromNearPlane * u_unis.m_oneOverFrustumLength));
|
|
|
+ const U32 endZSplit = min(Z_SPLIT_COUNT - 1u, U32(maxDistFromNearPlane * u_unis.m_oneOverFrustumLength));
|
|
|
+ for(U32 i = startZSplit; i <= endZSplit; ++i)
|
|
|
+ {
|
|
|
+ atomicOr(s_zSplitMasks[i], mask);
|
|
|
+ }
|
|
|
+
|
|
|
+ // Sync
|
|
|
+ memoryBarrierShared();
|
|
|
+ barrier();
|
|
|
+
|
|
|
+ // All invocations write a Z split
|
|
|
+ for(U32 i = gl_LocalInvocationIndex * splitsPerInvocation;
|
|
|
+ i < (gl_LocalInvocationIndex + 1u) * splitsPerInvocation && i < Z_SPLIT_COUNT; ++i)
|
|
|
+ {
|
|
|
+ if(clustererObjectIdx < u_unis.m_pointLightCount)
|
|
|
+ {
|
|
|
+ atomicOr(u_tilesAndZSplits[TILE_COUNT + i].m_pointLightsMask, s_zSplitMasks[i]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // First sample writes the tile
|
|
|
+ if((gl_LocalInvocationIndex % SAMPLE_COUNT) != 0)
|
|
|
+ {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ if(clustererObjectIdx < u_unis.m_pointLightCount)
|
|
|
+ {
|
|
|
+ atomicOr(u_tilesAndZSplits[tileIdx].m_pointLightsMask, s_tileMasks[localTileIdx]);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+#pragma anki end
|