// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors. // All rights reserved. // Code licensed under the BSD License. // http://www.anki3d.org/LICENSE #pragma anki mutator HZB_TEST 0 1 #pragma anki mutator OBJECT_TYPE 0 1 2 3 4 // Same as GpuSceneNonRenderableObjectType #pragma anki mutator CPU_FEEDBACK 0 1 #pragma anki skip_mutation CPU_FEEDBACK 1 OBJECT_TYPE 1 #pragma anki skip_mutation CPU_FEEDBACK 1 OBJECT_TYPE 2 #pragma anki technique comp #include #include #include #include #if OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_LIGHT typedef GpuSceneLight ObjectType; #elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_DECAL typedef GpuSceneDecal ObjectType; #elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_FOG_DENSITY_VOLUME typedef GpuSceneFogDensityVolume ObjectType; #elif OBJECT_TYPE == ANKI_GPU_SCENE_NON_RENDERABLE_OBJECT_TYPE_REFLECTION_PROBE typedef GpuSceneReflectionProbe ObjectType; #else typedef GpuSceneGlobalIlluminationProbe ObjectType; #endif StructuredBuffer g_objects : register(t0); #if HZB_TEST Texture2D g_hzbTex : register(t1); SamplerState g_nearestAnyClampSampler : register(s0); #endif RWStructuredBuffer g_visibleIndices : register(u0); // 1st element is the count. What follows is indices ANKI_FAST_CONSTANTS(GpuVisibilityNonRenderableConstants, g_consts) RWStructuredBuffer g_counterBuffer : register(u1); #if CPU_FEEDBACK // 1st element is a count. What follows is an array pairs of UUIDs and array index. RWStructuredBuffer g_cpuFeedbackBuffer : register(u2); #endif Vec4 getSphere(GpuSceneLight l) { return Vec4(l.m_position, l.m_radius); } Vec4 getSphere(GpuSceneDecal l) { return Vec4(l.m_sphereCenter, l.m_sphereRadius); } Vec4 getSphere(GpuSceneFogDensityVolume l) { if(l.m_isBox) { const Vec3 center = (l.m_aabbMinOrSphereCenter + l.m_aabbMaxOrSphereRadius) / 2.0f; const F32 radius = length(l.m_aabbMaxOrSphereRadius - center); return Vec4(center, radius); } else { return Vec4(l.m_aabbMinOrSphereCenter, l.m_aabbMaxOrSphereRadius.x); } } Vec4 getSphere(GpuSceneReflectionProbe l) { const Vec3 center = (l.m_aabbMin + l.m_aabbMax) / 2.0f; const F32 radius = length(center - l.m_aabbMax); return Vec4(center, radius); } Vec4 getSphere(GpuSceneGlobalIlluminationProbe l) { const Vec3 center = (l.m_aabbMin + l.m_aabbMax) / 2.0f; const F32 radius = length(center - l.m_aabbMax); return Vec4(center, radius); } #define NUMTHREADS 64 [numthreads(NUMTHREADS, 1, 1)] void main(U32 svDispatchThreadId : SV_DISPATCHTHREADID, U32 svGroupIndex : SV_GROUPINDEX) { Bool skip = false; U32 objectCount, unused; g_objects.GetDimensions(objectCount, unused); skip = (svDispatchThreadId >= objectCount); // Frustum test // Vec4 sphere = 0.0; if(!skip) { sphere = getSphere(g_objects[svDispatchThreadId]); skip = !frustumTest(g_consts.m_clipPlanes, sphere.xyz, sphere.w); } #if HZB_TEST if(!skip) { const Vec3 aabbMin = sphere.xyz - sphere.w; const Vec3 aabbMax = sphere.xyz + sphere.w; Vec2 minNdc, maxNdc; F32 aabbMinDepth; projectAabb(aabbMin, aabbMax, g_consts.m_viewProjectionMat, minNdc, maxNdc, aabbMinDepth); if(cullHzb(minNdc, maxNdc, aabbMinDepth, g_hzbTex, g_nearestAnyClampSampler)) { skip = true; } } #endif // Add the object // if(!skip) { U32 idx; InterlockedAdd(g_counterBuffer[0].m_visibleObjectCount, 1, idx); g_visibleIndices[idx + 1] = svDispatchThreadId; } // Give feedback to the CPU // #if CPU_FEEDBACK const Bool doFeedback = g_objects[svDispatchThreadId].m_cpuFeedback; if(!skip && doFeedback) { U32 idx; InterlockedAdd(g_counterBuffer[0].m_feedbackObjectCount, 1, idx); g_cpuFeedbackBuffer[idx * 2 + 1] = g_objects[svDispatchThreadId].m_uuid; g_cpuFeedbackBuffer[idx * 2 + 2] = g_objects[svDispatchThreadId].m_componentArrayIndex; } #endif // Sync to make sure all the atomic ops have finished before the following code reads them AllMemoryBarrierWithGroupSync(); // Store the counters to the actual buffers // if(svGroupIndex == 0) { U32 threadgroupIdx; InterlockedAdd(g_counterBuffer[0].m_threadgroupCount, 1, threadgroupIdx); const U32 threadgroupCount = (objectCount + NUMTHREADS - 1) / NUMTHREADS; const Bool lastThreadgroupExecuting = (threadgroupIdx + 1 == threadgroupCount); if(lastThreadgroupExecuting) { g_visibleIndices[0] = g_counterBuffer[0].m_visibleObjectCount; g_counterBuffer[0].m_visibleObjectCount = 0; #if CPU_FEEDBACK g_cpuFeedbackBuffer[0] = g_counterBuffer[0].m_feedbackObjectCount; g_counterBuffer[0].m_feedbackObjectCount = 0; #endif g_counterBuffer[0].m_threadgroupCount = 0; } } }