| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186 |
- // Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors.
- // All rights reserved.
- // Code licensed under the BSD License.
- // http://www.anki3d.org/LICENSE
- #pragma anki technique Visibility comp
- #pragma anki technique ZeroRemainingInstances comp
- #include <AnKi/Shaders/Common.hlsl>
- #include <AnKi/Shaders/Include/GpuSceneTypes.h>
- #include <AnKi/Shaders/Include/GpuVisibilityTypes.h>
- #include <AnKi/Shaders/Include/MiscRendererTypes.h>
- #include <AnKi/Shaders/VisibilityAndCollisionFunctions.hlsl>
- // ===========================================================================
- // Visibility =
- // ===========================================================================
- #if ANKI_COMPUTE_SHADER && NOT_ZERO(ANKI_TECHNIQUE_Visibility)
- // Buffers that point to the GPU scene
- StructuredBuffer<GpuSceneRenderableBoundingVolume> g_renderableBoundingVolumes : register(t0);
- StructuredBuffer<GpuSceneRenderable> g_renderables : register(t1);
- StructuredBuffer<GpuSceneMeshLod> g_meshLods : register(t2);
- StructuredBuffer<Mat3x4> g_transforms : register(t3);
- RWStructuredBuffer<AccelerationStructureInstance> g_visibleInstances : register(u0);
- RWStructuredBuffer<LodAndRenderableIndex> g_visibleRenderables : register(u1); // 1st element is the count
- globallycoherent RWStructuredBuffer<U32> g_counterBuffer : register(u2); // 2 counters per dispatch
- // Contains 2 elements. 1st is the args of the ZeroRemainingInstances
- RWStructuredBuffer<DispatchIndirectArgs> g_nextDispatchIndirectArgs : register(u3);
- ANKI_FAST_CONSTANTS(GpuVisibilityAccelerationStructuresConstants, g_consts)
- # define NUMTHREADS 64
- [numthreads(NUMTHREADS, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID, U32 svGroupIndex : SV_GROUPINDEX)
- {
- const U32 maxVisibleInstances = getStructuredBufferElementCount(g_visibleInstances);
- // Skip remaining threads
- const U32 bvolumeIdx = svDispatchThreadId;
- const U32 bvolumeCount = getStructuredBufferElementCount(g_renderableBoundingVolumes);
- Bool visible = (bvolumeIdx < bvolumeCount);
- // Sphere test
- GpuSceneRenderableBoundingVolume bvolume;
- Vec3 sphereCenter = 0.0;
- if(visible)
- {
- bvolume = SBUFF(g_renderableBoundingVolumes, bvolumeIdx);
- sphereCenter = (bvolume.m_aabbMin + bvolume.m_aabbMax) * 0.5f;
- visible = testSphereSphereCollision(sphereCenter, bvolume.m_sphereRadius, g_consts.m_pointOfTest, g_consts.m_testRadius);
- }
- // All good, write the instance
- if(visible)
- {
- // LOD selection
- U32 lod;
- const Bool insideCameraFrustum = frustumTest(g_consts.m_clipPlanes, sphereCenter, bvolume.m_sphereRadius);
- if(insideCameraFrustum)
- {
- // Visible by the camera, need to match the camera LODs
- const F32 distFromLodPoint = length(sphereCenter - g_consts.m_pointOfTest) - bvolume.m_sphereRadius;
- if(distFromLodPoint < g_consts.m_maxLodDistances[0])
- {
- lod = 0u;
- }
- else if(distFromLodPoint < g_consts.m_maxLodDistances[1])
- {
- lod = 1u;
- }
- else
- {
- lod = 2u;
- }
- }
- else
- {
- // Not visible by the main camera, lowest LOD
- lod = 2u;
- }
- const U32 renderableIdx = bvolume.m_renderableIndex;
- const GpuSceneRenderable renderable = SBUFF(g_renderables, renderableIdx);
- const U32 meshLodIndex = renderable.m_meshLodsIndex + lod;
- const GpuSceneMeshLod meshLod = SBUFF(g_meshLods, meshLodIndex);
- if(meshLod.m_blasAddress.x != 0 || meshLod.m_blasAddress.y != 0)
- {
- // It has a BLAS, write what is to write
- const Mat3x4 transform = SBUFF(g_transforms, renderable.m_worldTransformsIndex);
- Mat3x4 meshQuantizationTransform;
- meshQuantizationTransform.m_row0 = Vec4(meshLod.m_positionScale, 0.0f, 0.0f, meshLod.m_positionTranslation.x);
- meshQuantizationTransform.m_row1 = Vec4(0.0f, meshLod.m_positionScale, 0.0f, meshLod.m_positionTranslation.y);
- meshQuantizationTransform.m_row2 = Vec4(0.0f, 0.0f, meshLod.m_positionScale, meshLod.m_positionTranslation.z);
- const Mat3x4 finalTrf = combineTransformations(transform, meshQuantizationTransform);
- U32 instanceIdx;
- InterlockedAdd(SBUFF(g_counterBuffer, 0), 1, instanceIdx);
- if(instanceIdx < maxVisibleInstances)
- {
- AccelerationStructureInstance instance;
- instance.m_transform = finalTrf;
- instance.m_mask = meshLod.m_tlasInstanceMask;
- instance.m_instanceCustomIndex = renderable.m_diffuseColor & 0x00FFFFFFu;
- instance.m_flags = kAccellerationStructureFlagTriangleFrontCounterlockwise;
- instance.m_instanceShaderBindingTableRecordOffset = instanceIdx & 0x00FFFFFFu;
- instance.m_accelerationStructureAddress = meshLod.m_blasAddress;
- SBUFF(g_visibleInstances, instanceIdx) = instance;
- SBUFF(g_visibleRenderables, instanceIdx + 1).m_lod_2bit_renderableIndex_30bit = (lod << 30u) | renderableIdx;
- }
- }
- }
- // Store the counters to the actual buffers
- {
- // Sync to make sure all the atomic ops have finished before the following code reads them
- AllMemoryBarrierWithGroupSync();
- if(svGroupIndex == 0)
- {
- U32 threadgroupIdx;
- InterlockedAdd(SBUFF(g_counterBuffer, 1), 1, threadgroupIdx);
- const U32 threadgroupCount = (bvolumeCount + NUMTHREADS - 1) / NUMTHREADS;
- const Bool lastThreadgroupExecuting = (threadgroupIdx + 1 == threadgroupCount);
- if(lastThreadgroupExecuting)
- {
- const U32 visible = min(SBUFF(g_counterBuffer, 0), maxVisibleInstances);
- SBUFF(g_visibleRenderables, 0).m_lod_2bit_renderableIndex_30bit = visible;
- SBUFF(g_counterBuffer, 0) = 0;
- SBUFF(g_counterBuffer, 1) = 0;
- // Update indirect args of the ZeroRemainingInstances
- const U32 remaining = maxVisibleInstances - visible;
- DispatchIndirectArgs args;
- args.m_threadGroupCountX = (remaining + NUMTHREADS - 1) / NUMTHREADS;
- args.m_threadGroupCountY = 1;
- args.m_threadGroupCountZ = 1;
- SBUFF(g_nextDispatchIndirectArgs, 0) = args;
- // Update the args for the various SBT build dispatches
- args.m_threadGroupCountX = (visible + NUMTHREADS - 1) / NUMTHREADS;
- SBUFF(g_nextDispatchIndirectArgs, 1) = args;
- }
- }
- }
- }
- #endif // ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_Visibility
- // ===========================================================================
- // ZeroRemainingInstances =
- // ===========================================================================
- #if ANKI_COMPUTE_SHADER && NOT_ZERO(ANKI_TECHNIQUE_ZeroRemainingInstances)
- StructuredBuffer<U32> g_visibleRenderableIndices : register(t0); // 1st element is the count
- RWStructuredBuffer<AccelerationStructureInstance> g_instances : register(u0);
- # define NUMTHREADS 64
- [numthreads(NUMTHREADS, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID)
- {
- const U32 visibleInstances = g_visibleRenderableIndices[0];
- const U32 maxInstances = getStructuredBufferElementCount(g_instances);
- ANKI_ASSERT(maxInstances >= visibleInstances);
- const U32 remainingInstances = maxInstances - visibleInstances;
- if(svDispatchThreadId < remainingInstances)
- {
- SBUFF(g_instances, visibleInstances + svDispatchThreadId) = (AccelerationStructureInstance)0;
- }
- }
- #endif
|