123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630 |
- /*
- * Copyright (c) Contributors to the Open 3D Engine Project.
- * For complete copyright and license terms please see the LICENSE at the root of this distribution.
- *
- * SPDX-License-Identifier: Apache-2.0 OR MIT
- *
- */
- #include <scenesrg_all.srgi>
- #include <viewsrg_all.srgi>
- // Perform light culling on a compute shader
- #include <Atom/RPI/Math.azsli>
- #include <Atom/Features/LightCulling/LightCullingShared.azsli>
- #include <Atom/Features/PBR/Lights/LightStructures.azsli>
- #include <Atom/Features/Decals/DecalStructures.azsli>
- ShaderResourceGroup PassSrg : SRG_PerPass
- {
- struct LightCullingConstants
- {
- float2 m_gridPixel;
- float2 m_gridHalfPixel;
- uint m_gridWidth;
- uint m_padding0;
- uint m_padding1;
- uint m_padding2;
- };
- LightCullingConstants m_constantData;
- // Source light data
- StructuredBuffer<SimplePointLight> m_simplePointLights;
- StructuredBuffer<SimpleSpotLight> m_simpleSpotLights;
- StructuredBuffer<PointLight> m_pointLights;
- StructuredBuffer<DiskLight> m_diskLights;
- StructuredBuffer<CapsuleLight> m_capsuleLights;
- StructuredBuffer<QuadLight> m_quadLights;
- uint m_simplePointLightCount;
- uint m_simpleSpotLightCount;
- uint m_pointLightCount;
- uint m_diskLightCount;
- uint m_capsuleLightCount;
- uint m_quadLightCount;
- // Produced by the LightCullingTilePrepare pass. Contains depth min/max and mask data (a bit set for each location where opaque geo was found)
- Texture2D<uint4> m_tileLightData;
-
- // Destination light data
- RWStructuredBuffer<uint> m_lightList;
- RWTexture2D<uint> m_lightCount;
-
- StructuredBuffer<Decal> m_decals;
- uint m_decalCount;
- }
- groupshared uint shared_lightCount;
- groupshared uint shared_lightIndices[TILE_DIM_X * TILE_DIM_Y];
- bool IsVectorPointingTowardsEye(const float3 dir)
- {
- return (dir.z * RH_COORD_SYSTEM_REVERSE) < 0;
- }
- float3 WorldToView_Point(float3 p)
- {
- float3 result = mul(ViewSrg::m_viewMatrix, float4(p, 1.0)).xyz;
- return result;
- }
- float3 WorldToView_Vector(float3 v)
- {
- float3 result = mul((float3x3)ViewSrg::m_viewMatrix, v);
- return result;
- }
- bool TestSphereVsAabbInvSqrt(float3 sphereCenter, float invSphereRadiusSq, float3 aabbCenter, float3 aabbHalfSize)
- {
- float3 delta = max(float3(0.0, 0.0, 0.0), abs(aabbCenter - sphereCenter) - aabbHalfSize);
- float d2 = dot(delta, delta);
- return d2 * invSphereRadiusSq < 1.0f;
- }
- bool TestSphereVsAabb(float3 sphereCenter, float sphereRadiusSq, float3 aabbCenter, float3 aabbHalfSize)
- {
- float3 delta = max(float3(0.0, 0.0, 0.0), abs(aabbCenter - sphereCenter) - aabbHalfSize);
- float d2 = dot(delta, delta);
- return d2 < sphereRadiusSq;
- }
- // Note that this function isn't precise. It will have false positives due to being simplified for speed.
- // Function origin and description: https://bartwronski.com/2017/04/13/cull-that-cone/
- bool TestSphereVsCone(float3 spherePos, float sphereRadius, float3 origin, float3 forward, float cosa, float size)
- {
- float3 V = spherePos - origin;
- float V1len = dot(V, forward);
- bool backOk = V1len >= -sphereRadius;
- bool frontOk = V1len <= sphereRadius + size;
- float rsina = rsqrt(1 - cosa * cosa);
- float VlenSq = dot(V, V);
- float distanceClosestPoint = rsina * cosa * sqrt(max(0.0, VlenSq - V1len * V1len)) - V1len;
- bool angleOk = distanceClosestPoint <= sphereRadius* rsina;
- return angleOk && backOk && frontOk;
- }
- uint NextPowerTwo(uint x)
- {
- // https://wickedengine.net/2018/01/05/next-power-of-two-in-hlsl/
- return 2u << firstbithigh(max(1, x) - 1);
- }
- uint GetSortKey(Decal decal)
- {
- return (decal.m_sortKeyPacked & 0xFF);
- }
- bool AreDecalsOutofOrder(uint packedIndexLeft, uint packedIndexRight)
- {
- uint leftIndex = Light_GetIndex(packedIndexLeft);
- uint rightIndex = Light_GetIndex(packedIndexRight);
- Decal leftDecal = PassSrg::m_decals[leftIndex];
- Decal rightDecal = PassSrg::m_decals[rightIndex];
- uint leftSortIndex = GetSortKey(leftDecal);
- uint rightSortIndex = GetSortKey(rightDecal);
- if (leftSortIndex == rightSortIndex)
- {
- return leftIndex > rightIndex;
- }
- else
- {
- return leftSortIndex > rightSortIndex;
- }
- }
- void SortDecals(uint groupIndex)
- {
- // Note that shared_lightCount can exceed the array size if too many decals intersect the tile, so clamp it here.
- uint numArray = min(TILE_DIM_X * TILE_DIM_Y, shared_lightCount);
- uint numArrayPowerOfTwo = NextPowerTwo(numArray);
-
- // Bitonic sort code from AMD: https://github.com/GPUOpen-LibrariesAndSDKs/GPUParticles11
- // AMD / MIT License is contained in the same directory as this file
- // subArraySize = 2,4,8,16,etc...
- for (uint subArraySize = 2; subArraySize <= numArrayPowerOfTwo; subArraySize = subArraySize * 2)
- {
- // compareDist = (subArraySize / 2), (subArraySize / 4), ... 32, 16, 8, 4, 2, 1
- for (uint compareDist = subArraySize >> 1; compareDist > 0; compareDist = compareDist >> 1)
- {
- // This code from AMD very cleverly computes the locations of two different array indices to compare.
- // The pattern that is produced from this is identical to: https://en.wikipedia.org/wiki/Bitonic_sorter#Alternative_representation
- // Essentially creating larger monotonic sequences and then merging them together. (Two monotonic sequences together is a bitonic)
- uint index_low = groupIndex & (compareDist - 1);
- uint index_high = 2 * (groupIndex - index_low);
- uint index0 = index_high + index_low;
- uint index1 = compareDist == subArraySize >> 1 ? index_high + (2 * compareDist - 1) - index_low : index_high + compareDist + index_low;
- if (index0 < numArray && index1 < numArray)
- {
- bool areDecalsOutOrder = AreDecalsOutofOrder(shared_lightIndices[index0], shared_lightIndices[index1]);
- if (areDecalsOutOrder)
- {
- uint uTemp = shared_lightIndices[index0];
- shared_lightIndices[index0] = shared_lightIndices[index1];
- shared_lightIndices[index1] = uTemp;
- }
- }
- GroupMemoryBarrierWithGroupSync();
- }
- }
- }
- void MarkLightAsVisibleInSharedMemory(uint lightIndex, uint inside)
- {
- uint sharedLightIndex;
- InterlockedAdd(shared_lightCount, 1, sharedLightIndex);
-
- sharedLightIndex = min(sharedLightIndex, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1);
-
- shared_lightIndices[sharedLightIndex] = PackLightIndexWithBinMask(lightIndex, inside);
- }
-
- void CopySharedLightsToMainMemory(uint lightCount, uint groupIndex, uint3 groupID)
- {
- if( groupIndex < shared_lightCount )
- {
- uint offset = min(lightCount + groupIndex, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1);
- uint index = GetLightListIndex(groupID, PassSrg::m_constantData.m_gridWidth, offset);
- PassSrg::m_lightList[index] = shared_lightIndices[groupIndex];
- }
- }
- // Return the minz and maxz of this light in view space
- float2 ComputePointLightMinMaxZ(float lightRadius, float3 lightPosition)
- {
- float2 minmax = lightPosition.z + lightRadius * float2(-1,1) * RH_COORD_SYSTEM_REVERSE;
- return minmax;
- }
- float2 ComputeSimpleSpotLightMinMax(SimpleSpotLight light, float3 lightPosition)
- {
- float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared);
- float2 minmax = lightPosition.z + lightRadius * float2(-1, 1) * RH_COORD_SYSTEM_REVERSE;
- return minmax;
- }
- // Return the minz and maxz of this quad light in view space
- // Quad light must be double sided
- float2 ComputeQuadLightMinMaxZ_DoubleSided(QuadLight light, float3 lightPosition)
- {
- const float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared);
- const float2 minmax = lightPosition.z + lightRadius * float2(-1,1) * RH_COORD_SYSTEM_REVERSE;
- return minmax;
- }
- // Return the minz and maxz of this quad light in view space
- // Quad light must be single sided
- float2 ComputeQuadLightMinMaxZ_SingleSided(QuadLight light, float3 lightPosition, float3 lightDirection)
- {
- // [GFX TODO][ATOM-6170] We can compute a tighter bounds with single sided lights by bringing in one of bounds
- return ComputeQuadLightMinMaxZ_DoubleSided(light, lightPosition);
- }
- float2 ComputeDiskLightMinMax(DiskLight light, float3 lightPosition)
- {
- float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared) + light.m_bulbPositionOffset;
- float2 minmax = lightPosition.z + lightRadius * float2(-1, 1) * RH_COORD_SYSTEM_REVERSE;
- return minmax;
- }
- float2 ComputeCapsuleLightMinMax(CapsuleLight light, float3 lightPosition, float lightFalloffRadius)
- {
- float offsetZ = abs(WorldToView_Vector(light.m_direction).z * light.m_length * 0.5f) + lightFalloffRadius;
- float nearZ = lightPosition.z - offsetZ * RH_COORD_SYSTEM_REVERSE;
- float farZ = lightPosition.z + offsetZ * RH_COORD_SYSTEM_REVERSE;
-
- return float2(nearZ, farZ);
- }
- void CullDecals(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents, float2 tile_center_uv)
- {
- for (uint decalIndex = groupIndex ; decalIndex < PassSrg::m_decalCount ; decalIndex += TILE_DIM_X * TILE_DIM_Y)
- {
- Decal decal = PassSrg::m_decals[decalIndex];
- float3 decalPosition = WorldToView_Point(decal.m_position);
-
- // just wrapping a bounding sphere around a cube for now to get a minor perf boost. i.e. the sphere radius is sqrt(x*x + y*y + z*z)
- // ATOM-4224 - try AABB-AABB
- float boundingSphereRadiusSqr = dot(decal.m_halfSize, decal.m_halfSize);
-
- bool potentiallyIntersects = TestSphereVsAabb(decalPosition, boundingSphereRadiusSqr, aabb_center, aabb_extents);
-
- if (potentiallyIntersects)
- {
- uint inside = 0;
- float2 minmax = ComputePointLightMinMaxZ(sqrt(boundingSphereRadiusSqr), decalPosition);
- if (IsObjectInsideTile(tileLightData, minmax, inside))
- {
- MarkLightAsVisibleInSharedMemory(decalIndex, inside);
- }
- }
- }
- }
- void CullPointLight(uint lightIndex, float3 lightPosition, float invLightRadius, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
- {
- lightPosition = WorldToView_Point(lightPosition);
- bool potentiallyIntersects = TestSphereVsAabbInvSqrt(lightPosition, invLightRadius, aabb_center, aabb_extents);
- if (potentiallyIntersects)
- {
- // Implement and profile fine-grained light culling testing
- // ATOM-3732
- uint inside = 0;
- float2 minmax = ComputePointLightMinMaxZ(rsqrt(invLightRadius), lightPosition);
- if (IsObjectInsideTile(tileLightData, minmax, inside))
- {
- MarkLightAsVisibleInSharedMemory(lightIndex, inside);
- }
- }
- }
- void CullSimplePointLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
- {
- for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_simplePointLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
- {
- SimplePointLight light = PassSrg::m_simplePointLights[lightIndex];
- CullPointLight(lightIndex, light.m_position, light.m_invAttenuationRadiusSquared, tileLightData, aabb_center, aabb_extents);
- }
- }
- void CullPointLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
- {
- for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_pointLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
- {
- PointLight light = PassSrg::m_pointLights[lightIndex];
- CullPointLight(lightIndex, light.m_position, light.m_invAttenuationRadiusSquared, tileLightData, aabb_center, aabb_extents);
- }
- }
- void CullSimpleSpotLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
- {
- for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_simpleSpotLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
- {
- SimpleSpotLight light = PassSrg::m_simpleSpotLights[lightIndex];
- float3 lightPosition = WorldToView_Point(light.m_position);
- float3 lightDirection = WorldToView_Vector(light.m_direction);
-
- bool potentiallyIntersects = TestSphereVsCone(aabb_center, length(aabb_extents), lightPosition, lightDirection, light.m_cosOuterConeAngle, rsqrt(light.m_invAttenuationRadiusSquared));
- if (potentiallyIntersects)
- {
- // Implement and profile fine-grained light culling testing
- // ATOM-3732
- uint inside = 0;
- float2 minmax = ComputeSimpleSpotLightMinMax(light, lightPosition);
- if (IsObjectInsideTile(tileLightData, minmax, inside))
- {
- MarkLightAsVisibleInSharedMemory(lightIndex, inside);
- }
- }
- }
- }
- void CullDiskLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
- {
- for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_diskLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
- {
- DiskLight light = PassSrg::m_diskLights[lightIndex];
- float3 lightPosition = WorldToView_Point(light.m_position - light.m_bulbPositionOffset * light.m_direction);
- float lightRadius = rsqrt(light.m_invAttenuationRadiusSquared) + light.m_diskRadius;
- float lightRadiusSqr = lightRadius * lightRadius;
- float aabbRadius = length(aabb_extents);
- float3 lightDirection = WorldToView_Vector(light.m_direction);
- bool potentiallyIntersects;
- if ((light.m_flags & DiskLightFlag::UseConeAngle) > 0)
- {
- potentiallyIntersects = TestSphereVsCone(aabb_center, length(aabb_extents), lightPosition, lightDirection, light.m_cosOuterConeAngle, rsqrt(light.m_invAttenuationRadiusSquared) + light.m_bulbPositionOffset);
- }
- else
- {
- potentiallyIntersects = TestSphereVsAabb(lightPosition, lightRadiusSqr, aabb_center, aabb_extents);
- if (potentiallyIntersects)
- {
- // Only one side is visible, check that we are above the hemisphere
- float3 toAABBCenter = aabb_center - lightPosition;
- float distanceToLightPlane = dot(lightDirection, toAABBCenter);
-
- potentiallyIntersects = distanceToLightPlane >= -aabbRadius;
- }
- }
- if (potentiallyIntersects)
- {
- // Implement and profile fine-grained light culling testing
- // ATOM-3732
- uint inside = 0;
- float2 minmax = ComputeDiskLightMinMax(light, lightPosition);
- if (IsObjectInsideTile(tileLightData, minmax, inside))
- {
- MarkLightAsVisibleInSharedMemory(lightIndex, inside);
- }
- }
- }
- }
- void CullCapsuleLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
- {
- for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_capsuleLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
- {
- CapsuleLight light = PassSrg::m_capsuleLights[lightIndex];
- float3 lightMiddleWorld = light.m_startPoint + light.m_direction * light.m_length * 0.5f;
- float3 lightMiddleView = WorldToView_Point(lightMiddleWorld);
-
- float lightFalloffRadius = rsqrt(light.m_invAttenuationRadiusSquared);
- float lightConservativeBoundingRadius = lightFalloffRadius + light.m_length * 0.5f;
-
- bool potentiallyIntersects = TestSphereVsAabb(lightMiddleView, lightConservativeBoundingRadius * lightConservativeBoundingRadius, aabb_center, aabb_extents);
-
- if (potentiallyIntersects)
- {
- // Implement and profile fine-grained light culling testing
- // ATOM-3732
- uint inside = 0;
- float2 minmax = ComputeCapsuleLightMinMax(light, lightMiddleView, lightFalloffRadius);
- if (IsObjectInsideTile(tileLightData, minmax, inside))
- {
- MarkLightAsVisibleInSharedMemory(lightIndex, inside);
- }
- }
- }
- }
- void CullQuadLights(uint groupIndex, TileLightData tileLightData, float3 aabb_center, float3 aabb_extents)
- {
- // Implement and profile fine-grained light culling testing
- // ATOM-3732
- for (uint lightIndex = groupIndex ; lightIndex < PassSrg::m_quadLightCount ; lightIndex += TILE_DIM_X * TILE_DIM_Y)
- {
- const QuadLight light = PassSrg::m_quadLights[lightIndex];
- const float3 lightPosition = WorldToView_Point(light.m_position);
-
- bool potentiallyIntersects = TestSphereVsAabbInvSqrt(lightPosition, light.m_invAttenuationRadiusSquared, aabb_center, aabb_extents);
- if (potentiallyIntersects)
- {
- float2 minmaxz;
-
- const bool singleSided = (light.m_flags & QuadLightFlag::EmitsBothDirections) == 0;
- if (singleSided)
- {
- // Only one side is visible, check that we are above the hemisphere
- const float3 leftDir = light.m_leftDir;
- const float3 upDir = light.m_upDir;
- const float3 lightDirection = WorldToView_Vector(cross(leftDir, upDir));
- const float3 toAABBCenter = aabb_center - lightPosition;
- const float distanceToLightPlane = dot(lightDirection, toAABBCenter);
- const float aabbRadius = length(aabb_extents);
-
- const bool aboveHemisphere = distanceToLightPlane >= -aabbRadius;
- if (aboveHemisphere)
- {
- minmaxz = ComputeQuadLightMinMaxZ_SingleSided(light, lightPosition, lightDirection);
- }
- else
- {
- potentiallyIntersects = false;
- }
- }
- else
- {
- minmaxz = ComputeQuadLightMinMaxZ_DoubleSided(light, lightPosition);
- }
- uint inside = 0;
- if (potentiallyIntersects && IsObjectInsideTile(tileLightData, minmaxz, inside))
- {
- MarkLightAsVisibleInSharedMemory(lightIndex, inside);
- }
- }
- }
- }
- uint WriteEndOfGroup(uint lightCount, uint3 groupID)
- {
- uint lightsAfter = lightCount + shared_lightCount;
-
- uint end = PackLightIndexWithBinMask(NVLC_END_OF_GROUP, NVLC_ALL_BIN_BITS);
- uint offset = min(lightCount + shared_lightCount, NVLC_MAX_POSSIBLE_LIGHTS_PER_BIN - 1);
- uint index = GetLightListIndex(groupID, PassSrg::m_constantData.m_gridWidth, offset);
- PassSrg::m_lightList[index] = end;
- lightsAfter++;
-
- return lightsAfter;
- }
- void ClearSharedLightCount(uint groupIndex)
- {
- if( groupIndex == 0 )
- {
- shared_lightCount = 0;
- }
- }
- void ClearSharedLightCountWithDoubleBarrier(uint groupIndex)
- {
- GroupMemoryBarrierWithGroupSync();
- ClearSharedLightCount(groupIndex);
- GroupMemoryBarrierWithGroupSync();
- }
- float2 ReadDepthCloseFar(uint3 groupID)
- {
- float2 depthCloseFar = asfloat(PassSrg::m_tileLightData[groupID.xy].xy);
- return depthCloseFar;
- }
- TileLightData ReadTileLightData(uint3 groupID)
- {
- uint4 packedData = PassSrg::m_tileLightData[groupID.xy];
- return Tile_UnpackData(packedData);
- }
- uint WriteCullingDataToMainMemory(uint lightCount, uint groupIndex, uint3 groupID)
- {
- GroupMemoryBarrierWithGroupSync();
- CopySharedLightsToMainMemory(lightCount, groupIndex, groupID );
- lightCount = WriteEndOfGroup(lightCount, groupID);
- return lightCount;
- }
- // Converts depth in view space to depth buffer depth
- // It is used when depth is in view space and we want to use it as depth for projection matrix
- float ViewSpaceToDepthBuffer(float depth)
- {
- float a = ViewSrg::m_projectionMatrix[2][3];
- float b = ViewSrg::m_projectionMatrix[2][2];
- return a / depth - b;
- }
- float3 GetViewSpacePosition(float2 tileUv, float depth)
- {
- // Map UV from [0, 1] to NDC [-1, 1]
- tileUv = tileUv * 2.0f - 1.0f;
- float4 clipPos = float4(tileUv.x, -tileUv.y, depth, 1.0f);
- float4 viewPos = mul(ViewSrg::m_projectionMatrixInverse, clipPos);
- return viewPos.xyz / viewPos.w;
- }
- // This function builds an AABB in view space that encompasses the tile
- // The AABB is built by taking the four corners of the tile and the center of the tile, and transforming them to view space
- void BuildAabb(TileLightData tileLightData, uint2 tileId, out float3 aabbCenter, out float3 aabbExtents, out float2 tileCenterUv)
- {
- float2 tileUvMin = float2(tileId) * PassSrg::m_constantData.m_gridPixel;
- float2 tileUvMax = tileUvMin + PassSrg::m_constantData.m_gridPixel;
- tileCenterUv = tileUvMin + PassSrg::m_constantData.m_gridHalfPixel;
- const float depthMin = ViewSpaceToDepthBuffer(-tileLightData.zNear);;
- const float depthMax = ViewSpaceToDepthBuffer(-tileLightData.zFar);
- float3 pt0 = GetViewSpacePosition(tileUvMin, depthMin);
- float3 pt1 = GetViewSpacePosition(tileUvMin, depthMax);
- float3 pt2 = GetViewSpacePosition(tileUvMax, depthMin);
- float3 pt3 = GetViewSpacePosition(tileUvMax, depthMax);
- float3 aabbMin = min(min(pt0, pt1), min(pt2, pt3));
- float3 aabbMax = max(max(pt0, pt1), max(pt2, pt3));
- aabbCenter = (aabbMin + aabbMax) * 0.5f;
- aabbExtents = (aabbMax - aabbMin);
- }
- // This shader is invoke one thread-group per on-screen tile
- // e.g. if the screen resolution is 1920x1080, with 16x16 tiles, there will be 120x68 tiles (and 120x68 thread groups)
- // Each thread-group is dedicated to culling all lights against that screen-tile.
- // It might be worth splitting this compute shader into several shaders, one per light type.
- // Each thread will read one light, determine if it is visible, write it to shared memory, then move onto the next light until
- // all lights are processed
- // After all lights visibility is computed, it will write them back from shared memory to GPU memory
- // This will write out the following:
- // Point light index << 16 | bitmask contains which bits the light is present in
- // Point light index << 16 | bitmask contains which bits the light is present in
- // Point light index << 16 | bitmask contains which bits the light is present in
- // ...
- // End of Group
- // Disk light index << 16 | bitmask contains which bits the light is present in
- // Disk light index << 16 | bitmask contains which bits the light is present in
- // Disk light index << 16 | bitmask contains which bits the light is present in
- // ...
- // End of Group
- // i.e. for each 32-bit UINT, it contains the 16 bit light index + 16-bit binning information
- // (other light types and decals to come)
- // Note! This isn't consumed by the forward shader. This light list will be further processed by the LightCullingRemap shader, producing a LightListRemapped buffer
- // that is more optimal for consumption by the forward shader.
- [numthreads(TILE_DIM_X, TILE_DIM_Y, 1)]
- void MainCS(
- uint3 dispatchThreadID : SV_DispatchThreadID,
- uint3 groupID : SV_GroupID,
- uint groupIndex : SV_GroupIndex)
- {
- ClearSharedLightCount(groupIndex);
- uint lightCount = 0;
- TileLightData tileLightData = ReadTileLightData(groupID);
-
- float2 tileCenterUv;
- float3 aabb_center, aabb_extents;
- BuildAabb(tileLightData, groupID.xy, aabb_center, aabb_extents, tileCenterUv);
- GroupMemoryBarrierWithGroupSync();
-
- CullDecals(groupIndex, tileLightData, aabb_center, aabb_extents, tileCenterUv);
- GroupMemoryBarrierWithGroupSync();
- SortDecals(groupIndex);
- lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
-
- ClearSharedLightCountWithDoubleBarrier(groupIndex);
-
- CullSimplePointLights(groupIndex, tileLightData, aabb_center, aabb_extents);
- lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
-
- ClearSharedLightCountWithDoubleBarrier(groupIndex);
- CullSimpleSpotLights(groupIndex, tileLightData, aabb_center, aabb_extents);
- lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
-
- ClearSharedLightCountWithDoubleBarrier(groupIndex);
- CullPointLights(groupIndex, tileLightData, aabb_center, aabb_extents);
- lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
-
- ClearSharedLightCountWithDoubleBarrier(groupIndex);
- CullDiskLights(groupIndex, tileLightData, aabb_center, aabb_extents);
- lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
-
- ClearSharedLightCountWithDoubleBarrier(groupIndex);
- CullCapsuleLights(groupIndex, tileLightData, aabb_center, aabb_extents);
- lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
- ClearSharedLightCountWithDoubleBarrier(groupIndex);
- CullQuadLights(groupIndex, tileLightData, aabb_center, aabb_extents);
- lightCount = WriteCullingDataToMainMemory(lightCount, groupIndex, groupID );
- if (groupIndex == 0)
- {
- PassSrg::m_lightCount[groupID.xy] = lightCount;
- }
- }
|