| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368 |
- // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
- // All rights reserved.
- // Code licensed under the BSD License.
- // http://www.anki3d.org/LICENSE
- // Performs cluster binning. The dispatch's threadcount X is the tileCount*sampleCount/numthreads and the Y is the number of visible objects
- #pragma anki mutator OBJECT_TYPE 0 1 2 3 4 // Same as GpuSceneNonRenderableObjectType
- #pragma anki technique Setup comp mutators
- #pragma anki technique Binning comp
- #pragma anki technique PackVisibles comp
- #include <AnKi/Shaders/Include/ClusteredShadingTypes.h>
- #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
- // ===========================================================================
- // Setup =
- // ===========================================================================
- #if NOT_ZERO(ANKI_TECHNIQUE_Setup)
- StructuredBuffer<U32> g_visibleIndices[(U32)GpuSceneNonRenderableObjectType::kCount] : register(t0);
- // This has a size of 2*GpuSceneNonRenderableObjectType::kCount. The first GpuSceneNonRenderableObjectType::kCount elements are for the cluster
- // binning dispatches and the rest GpuSceneNonRenderableObjectType::kCount for the packing dispatches
- RWStructuredBuffer<DispatchIndirectArgs> g_indirectArgs : register(u0);
- struct Constants
- {
- U32 m_tileCount;
- U32 m_padding1;
- U32 m_padding2;
- U32 m_padding3;
- };
- ANKI_FAST_CONSTANTS(Constants, g_consts)
- constexpr U32 kSampleCount = 8;
- constexpr U32 kClusterBinningThreadgroupSize = 64;
- constexpr U32 kPackVisiblesThreadgroupSize = 64;
- # define THREADGROUP_SIZE 16
- [numthreads(THREADGROUP_SIZE, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID)
- {
- if(svDispatchThreadId < (U32)GpuSceneNonRenderableObjectType::kCount)
- {
- // First threads set the dispatch args of cluster binning
- const GpuSceneNonRenderableObjectType type = (GpuSceneNonRenderableObjectType)svDispatchThreadId;
- const U32 objCount = min(kMaxVisibleClusteredObjects[(U32)type], g_visibleIndices[NonUniformResourceIndex((U32)type)][0]);
- DispatchIndirectArgs args;
- args.m_threadGroupCountX = (g_consts.m_tileCount * kSampleCount + kClusterBinningThreadgroupSize - 1) / kClusterBinningThreadgroupSize;
- args.m_threadGroupCountY = objCount;
- args.m_threadGroupCountZ = 1;
- g_indirectArgs[svDispatchThreadId] = args;
- }
- else if(svDispatchThreadId < (U32)GpuSceneNonRenderableObjectType::kCount * 2)
- {
- // Next threads set the dispatch args of packing
- const GpuSceneNonRenderableObjectType type =
- (GpuSceneNonRenderableObjectType)(svDispatchThreadId - (U32)GpuSceneNonRenderableObjectType::kCount);
- const U32 objCount = min(kMaxVisibleClusteredObjects[(U32)type], g_visibleIndices[NonUniformResourceIndex((U32)type)][0]);
- DispatchIndirectArgs args;
- args.m_threadGroupCountX = (objCount + kPackVisiblesThreadgroupSize - 1) / kPackVisiblesThreadgroupSize;
- args.m_threadGroupCountY = 1;
- args.m_threadGroupCountZ = 1;
- g_indirectArgs[svDispatchThreadId] = args;
- }
- else
- {
- // Skip remaining threads
- }
- }
- #endif
- // ===========================================================================
- // Binning =
- // ===========================================================================
- #if NOT_ZERO(ANKI_TECHNIQUE_Binning)
- # if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
- typedef GpuSceneLight GpuSceneType;
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
- typedef GpuSceneDecal GpuSceneType;
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
- typedef GpuSceneFogDensityVolume GpuSceneType;
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
- typedef GpuSceneReflectionProbe GpuSceneType;
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
- typedef GpuSceneGlobalIlluminationProbe GpuSceneType;
- # else
- # error See file
- # endif
- struct ClusterBinningConstants
- {
- Vec3 m_cameraOrigin;
- F32 m_zSplitCountOverFrustumLength;
- Vec2 m_renderingSize;
- U32 m_tileCountX;
- U32 m_tileCount;
- Vec4 m_nearPlaneWorld;
- I32 m_zSplitCountMinusOne;
- I32 m_padding0;
- I32 m_padding1;
- I32 m_padding2;
- Mat4 m_invertedViewProjMat;
- };
- ANKI_FAST_CONSTANTS(ClusterBinningConstants, g_consts)
- StructuredBuffer<U32> g_visibleObjectIds : register(t0); // 1st index is the count and then the indices to the g_objects
- StructuredBuffer<GpuSceneType> g_objects : register(t1);
- RWStructuredBuffer<Cluster> g_clusters : register(u0);
- # define THREADGROUP_SIZE 64
- // ALMOST like DX Sample locations (https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_standard_multisample_quality_levels)
- constexpr U32 kSampleCount = 8u;
- # define LOCATION(x, y) UVec2(Vec2(IVec2(x, y) + 8) / 16.0 * F32(kClusteredShadingTileSize))
- constexpr UVec2 kSampleLocations[kSampleCount] = {LOCATION(1, -3), LOCATION(-1, 3), LOCATION(5, 1), LOCATION(-6, -6),
- LOCATION(-6, 6), LOCATION(-7, -1), LOCATION(6, 7), LOCATION(7, -7)};
- # undef LOCATION
- [numthreads(THREADGROUP_SIZE, 1, 1)] void main(UVec2 svDispatchThreadId : SV_DISPATCHTHREADID)
- {
- const U32 dispatchThreadIdX = min(svDispatchThreadId.x, g_consts.m_tileCount * kSampleCount);
- const U32 tileIdx = dispatchThreadIdX / kSampleCount;
- const U32 sampleIdx = dispatchThreadIdX % kSampleCount;
- const U32 visibleObjectIdx = svDispatchThreadId.y;
- ANKI_ASSERT(visibleObjectIdx < kMaxVisibleClusteredObjects[OBJECT_TYPE]);
- const UVec2 tileXY = UVec2(tileIdx % g_consts.m_tileCountX, tileIdx / g_consts.m_tileCountX);
- // This is a pixel in one of the main framebuffers of the renderer, eg the gbuffer's framebuffers
- const UVec2 pixel = tileXY * kClusteredShadingTileSize + kSampleLocations[sampleIdx];
- const Vec2 uv = Vec2(pixel) / g_consts.m_renderingSize;
- const Vec2 ndc = uvToNdc(uv);
- // Unproject the sample to world space
- const Vec4 farWorldPos4 = mul(g_consts.m_invertedViewProjMat, Vec4(ndc, 1.0, 1.0));
- const Vec3 farWorldPos = farWorldPos4.xyz / farWorldPos4.w;
- // Create the ray that will test the clusterer objects
- const Vec3 rayOrigin = g_consts.m_cameraOrigin;
- const Vec3 rayDir = normalize(farWorldPos - rayOrigin);
- // Do collision
- F32 t0, t1;
- Bool collides;
- const GpuSceneType obj = g_objects[g_visibleObjectIds[visibleObjectIdx + 1]];
- # if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
- if(obj.m_isPointLight)
- {
- collides = testRaySphere(rayOrigin, rayDir, obj.m_position, obj.m_radius, t0, t1);
- }
- else
- {
- // Spot light
- t0 = 10000.0;
- t1 = -10000.0;
- // Iterate all triangles
- const U32 indices[6u * 3u] = {0u, 1u, 2u, 0u, 2u, 3u, 0u, 3u, 4u, 0u, 1u, 4u, 1u, 2u, 3u, 3u, 4u, 1u};
- const Vec3 edgePoints[5u] = {obj.m_position, obj.m_edgePoints[0].xyz, obj.m_edgePoints[1].xyz, obj.m_edgePoints[2].xyz,
- obj.m_edgePoints[3].xyz};
- U32 hits = 0u;
- U32 idx = 0u;
- do
- {
- const Vec3 v0 = edgePoints[indices[idx + 0u]];
- const Vec3 v1 = edgePoints[indices[idx + 1u]];
- const Vec3 v2 = edgePoints[indices[idx + 2u]];
- F32 t, u, v;
- const Bool localCollides = testRayTriangle(rayOrigin, rayDir, v0, v1, v2, false, t, u, v);
- if(localCollides)
- {
- t0 = min(t0, t);
- t1 = max(t1, t);
- ++hits;
- }
- idx += 3u;
- } while(hits < 2u && idx < 6u * 3u);
- if(hits == 1u)
- {
- t0 = 0.0;
- }
- collides = (hits != 0u);
- }
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
- collides = testRaySphere(rayOrigin, rayDir, obj.m_sphereCenter, obj.m_sphereRadius, t0, t1);
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
- if(obj.m_isBox != 0u)
- {
- collides = testRayAabb(rayOrigin, rayDir, obj.m_aabbMinOrSphereCenter, obj.m_aabbMaxOrSphereRadius, t0, t1);
- }
- else
- {
- collides = testRaySphere(rayOrigin, rayDir, obj.m_aabbMinOrSphereCenter, obj.m_aabbMaxOrSphereRadius.x, t0, t1);
- }
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE \
- || OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
- collides = testRayAabb(rayOrigin, rayDir, obj.m_aabbMin, obj.m_aabbMax, t0, t1);
- # else
- # error See file
- # endif
- // Update the masks
- if(collides)
- {
- const U32 mask = 1u << (visibleObjectIdx % 32);
- const U32 maskArrayIdx = visibleObjectIdx / 32;
- ANKI_MAYBE_UNUSED(maskArrayIdx);
- // Set the tile
- # if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
- if(obj.m_isPointLight)
- {
- InterlockedOr(g_clusters[tileIdx].m_pointLightsMask[maskArrayIdx], mask);
- }
- else
- {
- InterlockedOr(g_clusters[tileIdx].m_spotLightsMask[maskArrayIdx], mask);
- }
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
- InterlockedOr(g_clusters[tileIdx].m_decalsMask[maskArrayIdx], mask);
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
- InterlockedOr(g_clusters[tileIdx].m_fogDensityVolumesMask, mask);
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
- InterlockedOr(g_clusters[tileIdx].m_reflectionProbesMask, mask);
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
- InterlockedOr(g_clusters[tileIdx].m_giProbesMask, mask);
- # else
- # error See file
- # endif
- // Compute and set the Z splits
- const Vec3 hitpointA = rayDir * t0 + rayOrigin;
- const Vec3 hitpointB = rayDir * t1 + rayOrigin;
- const F32 distFromNearPlaneA = testPlanePoint(g_consts.m_nearPlaneWorld.xyz, g_consts.m_nearPlaneWorld.w, hitpointA);
- const F32 distFromNearPlaneB = testPlanePoint(g_consts.m_nearPlaneWorld.xyz, g_consts.m_nearPlaneWorld.w, hitpointB);
- F32 minDistFromNearPlane;
- F32 maxDistFromNearPlane;
- if(distFromNearPlaneA < distFromNearPlaneB)
- {
- minDistFromNearPlane = distFromNearPlaneA;
- maxDistFromNearPlane = distFromNearPlaneB;
- }
- else
- {
- minDistFromNearPlane = distFromNearPlaneB;
- maxDistFromNearPlane = distFromNearPlaneA;
- }
- const I32 startZSplit = max(I32(minDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0);
- const I32 endZSplit = clamp(I32(maxDistFromNearPlane * g_consts.m_zSplitCountOverFrustumLength), 0, g_consts.m_zSplitCountMinusOne);
- for(I32 i = startZSplit; i <= endZSplit; ++i)
- {
- # if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
- if(obj.m_isPointLight)
- {
- InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_pointLightsMask[maskArrayIdx], mask);
- }
- else
- {
- InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_spotLightsMask[maskArrayIdx], mask);
- }
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
- InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_decalsMask[maskArrayIdx], mask);
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
- InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_fogDensityVolumesMask, mask);
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
- InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_reflectionProbesMask, mask);
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
- InterlockedOr(g_clusters[g_consts.m_tileCount + i].m_giProbesMask, mask);
- # else
- # error See file
- # endif
- }
- }
- }
- #endif
- // ===========================================================================
- // PackVisibles =
- // ===========================================================================
- #if NOT_ZERO(ANKI_TECHNIQUE_PackVisibles)
- # if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
- typedef LightUnion ClusteredType;
- typedef GpuSceneLight GpuSceneType;
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL
- typedef Decal ClusteredType;
- typedef GpuSceneDecal GpuSceneType;
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME
- typedef FogDensityVolume ClusteredType;
- typedef GpuSceneFogDensityVolume GpuSceneType;
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE
- typedef ReflectionProbe ClusteredType;
- typedef GpuSceneReflectionProbe GpuSceneType;
- # elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_GLOBAL_ILLUMINATION_PROBE
- typedef GlobalIlluminationProbe ClusteredType;
- typedef GpuSceneGlobalIlluminationProbe GpuSceneType;
- # else
- # error See file
- # endif
- StructuredBuffer<GpuSceneType> g_inBuffer : register(t0);
- RWStructuredBuffer<ClusteredType> g_outBuffer : register(u0);
- StructuredBuffer<U32> g_visibles : register(t1);
- # define THREAD_GROUP_SIZE 64
- [numthreads(THREAD_GROUP_SIZE, 1, 1)] void main(UVec3 svDispatchThreadId : SV_DISPATCHTHREADID)
- {
- const U32 visibleObjCount = min(g_visibles[0], kMaxVisibleClusteredObjects[OBJECT_TYPE]);
- const U32 idxOut = svDispatchThreadId.x;
- if(idxOut >= visibleObjCount)
- {
- return;
- }
- # if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT
- const GpuSceneLight input = g_inBuffer[g_visibles[idxOut + 1]];
- const Bool isPoint = input.m_isPointLight;
- LightUnion output = (LightUnion)0;
- output.m_position = input.m_position;
- output.m_radius = input.m_radius;
- output.m_diffuseColor = input.m_diffuseColor;
- output.m_lightType = (isPoint) ? 0 : 1;
- output.m_shadow = input.m_shadow;
- output.m_innerCos = input.m_innerCos;
- output.m_outerCos = input.m_outerCos;
- output.m_direction = input.m_direction;
- output.m_shadowAtlasTileScale = input.m_spotLightMatrixOrPointLightUvViewports[0].z; // Scale should be the same for all
- for(U32 i = 0; i < 6; ++i)
- {
- output.m_spotLightMatrixOrPointLightUvViewports[i] = input.m_spotLightMatrixOrPointLightUvViewports[i];
- }
- g_outBuffer[idxOut] = output;
- # else
- g_outBuffer[idxOut] = g_inBuffer[g_visibles[idxOut + 1]];
- # endif
- }
- #endif
|