// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors. // All rights reserved. // Code licensed under the BSD License. // http://www.anki3d.org/LICENSE #pragma anki technique Visibility comp #pragma anki technique ZeroRemainingInstances comp #include #include #include #include #include // =========================================================================== // Visibility = // =========================================================================== #if ANKI_COMPUTE_SHADER && NOT_ZERO(ANKI_TECHNIQUE_Visibility) // Buffers that point to the GPU scene StructuredBuffer g_renderableBoundingVolumes : register(t0); StructuredBuffer g_renderables : register(t1); StructuredBuffer g_meshLods : register(t2); StructuredBuffer g_transforms : register(t3); RWStructuredBuffer g_visibleInstances : register(u0); RWStructuredBuffer g_visibleRenderables : register(u1); // 1st element is the count globallycoherent RWStructuredBuffer g_counterBuffer : register(u2); // 2 counters per dispatch // Contains 2 elements. 1st is the args of the ZeroRemainingInstances RWStructuredBuffer g_nextDispatchIndirectArgs : register(u3); ANKI_FAST_CONSTANTS(GpuVisibilityAccelerationStructuresConstants, g_consts) # define NUMTHREADS 64 [numthreads(NUMTHREADS, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID, U32 svGroupIndex : SV_GROUPINDEX) { const U32 maxVisibleInstances = getStructuredBufferElementCount(g_visibleInstances); // Skip remaining threads const U32 bvolumeIdx = svDispatchThreadId; const U32 bvolumeCount = getStructuredBufferElementCount(g_renderableBoundingVolumes); Bool visible = (bvolumeIdx < bvolumeCount); // Sphere test GpuSceneRenderableBoundingVolume bvolume; Vec3 sphereCenter = 0.0; if(visible) { bvolume = SBUFF(g_renderableBoundingVolumes, bvolumeIdx); sphereCenter = (bvolume.m_aabbMin + bvolume.m_aabbMax) * 0.5f; visible = testSphereSphereCollision(sphereCenter, bvolume.m_sphereRadius, g_consts.m_pointOfTest, g_consts.m_testRadius); } // All good, write the instance if(visible) { // LOD selection U32 lod; const Bool insideCameraFrustum = frustumTest(g_consts.m_clipPlanes, sphereCenter, bvolume.m_sphereRadius); if(insideCameraFrustum) { // Visible by the camera, need to match the camera LODs const F32 distFromLodPoint = length(sphereCenter - g_consts.m_pointOfTest) - bvolume.m_sphereRadius; if(distFromLodPoint < g_consts.m_maxLodDistances[0]) { lod = 0u; } else if(distFromLodPoint < g_consts.m_maxLodDistances[1]) { lod = 1u; } else { lod = 2u; } } else { // Not visible by the main camera, lowest LOD lod = 2u; } const U32 renderableIdx = bvolume.m_renderableIndex; const GpuSceneRenderable renderable = SBUFF(g_renderables, renderableIdx); const U32 meshLodIndex = renderable.m_meshLodsIndex + lod; const GpuSceneMeshLod meshLod = SBUFF(g_meshLods, meshLodIndex); if(meshLod.m_blasAddress.x != 0 || meshLod.m_blasAddress.y != 0) { // It has a BLAS, write what is to write const Mat3x4 transform = SBUFF(g_transforms, renderable.m_worldTransformsIndex); Mat3x4 meshQuantizationTransform; meshQuantizationTransform.m_row0 = Vec4(meshLod.m_positionScale, 0.0f, 0.0f, meshLod.m_positionTranslation.x); meshQuantizationTransform.m_row1 = Vec4(0.0f, meshLod.m_positionScale, 0.0f, meshLod.m_positionTranslation.y); meshQuantizationTransform.m_row2 = Vec4(0.0f, 0.0f, meshLod.m_positionScale, meshLod.m_positionTranslation.z); const Mat3x4 finalTrf = combineTransformations(transform, meshQuantizationTransform); U32 instanceIdx; InterlockedAdd(SBUFF(g_counterBuffer, 0), 1, instanceIdx); if(instanceIdx < maxVisibleInstances) { AccelerationStructureInstance instance; instance.m_transform = finalTrf; instance.m_mask = meshLod.m_tlasInstanceMask; instance.m_instanceCustomIndex = renderable.m_diffuseColor & 0x00FFFFFFu; instance.m_flags = kAccellerationStructureFlagTriangleFrontCounterlockwise; instance.m_instanceShaderBindingTableRecordOffset = instanceIdx & 0x00FFFFFFu; instance.m_accelerationStructureAddress = meshLod.m_blasAddress; SBUFF(g_visibleInstances, instanceIdx) = instance; SBUFF(g_visibleRenderables, instanceIdx + 1).m_lod_2bit_renderableIndex_30bit = (lod << 30u) | renderableIdx; } } } // Store the counters to the actual buffers { // Sync to make sure all the atomic ops have finished before the following code reads them AllMemoryBarrierWithGroupSync(); if(svGroupIndex == 0) { U32 threadgroupIdx; InterlockedAdd(SBUFF(g_counterBuffer, 1), 1, threadgroupIdx); const U32 threadgroupCount = (bvolumeCount + NUMTHREADS - 1) / NUMTHREADS; const Bool lastThreadgroupExecuting = (threadgroupIdx + 1 == threadgroupCount); if(lastThreadgroupExecuting) { const U32 visible = min(SBUFF(g_counterBuffer, 0), maxVisibleInstances); SBUFF(g_visibleRenderables, 0).m_lod_2bit_renderableIndex_30bit = visible; SBUFF(g_counterBuffer, 0) = 0; SBUFF(g_counterBuffer, 1) = 0; // Update indirect args of the ZeroRemainingInstances const U32 remaining = maxVisibleInstances - visible; DispatchIndirectArgs args; args.m_threadGroupCountX = (remaining + NUMTHREADS - 1) / NUMTHREADS; args.m_threadGroupCountY = 1; args.m_threadGroupCountZ = 1; SBUFF(g_nextDispatchIndirectArgs, 0) = args; // Update the args for the various SBT build dispatches args.m_threadGroupCountX = (visible + NUMTHREADS - 1) / NUMTHREADS; SBUFF(g_nextDispatchIndirectArgs, 1) = args; } } } } #endif // ANKI_COMPUTE_SHADER && ANKI_TECHNIQUE_Visibility // =========================================================================== // ZeroRemainingInstances = // =========================================================================== #if ANKI_COMPUTE_SHADER && NOT_ZERO(ANKI_TECHNIQUE_ZeroRemainingInstances) StructuredBuffer g_visibleRenderableIndices : register(t0); // 1st element is the count RWStructuredBuffer g_instances : register(u0); # define NUMTHREADS 64 [numthreads(NUMTHREADS, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID) { const U32 visibleInstances = g_visibleRenderableIndices[0]; const U32 maxInstances = getStructuredBufferElementCount(g_instances); ANKI_ASSERT(maxInstances >= visibleInstances); const U32 remainingInstances = maxInstances - visibleInstances; if(svDispatchThreadId < remainingInstances) { SBUFF(g_instances, visibleInstances + svDispatchThreadId) = (AccelerationStructureInstance)0; } } #endif