/* * Copyright (c) Contributors to the Open 3D Engine Project. * For complete copyright and license terms please see the LICENSE at the root of this distribution. * * SPDX-License-Identifier: Apache-2.0 OR MIT * */ #include ShaderResourceGroup TexturesSrg : SRG_PerObject { RWTexture2D m_outputTexture; Texture2D m_inputTexture; }; #define ThreadGroupSize 8 const static uint NumThreads = ThreadGroupSize * ThreadGroupSize; groupshared static float4 s_threadGroupSharedMem[NumThreads]; [numthreads(ThreadGroupSize, ThreadGroupSize, 1)] void MainCS(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID, uint groupIndex : SV_GroupIndex) { const uint threadIdx = groupIndex; const uint2 sampleIdx = (groupID.xy * ThreadGroupSize + groupThreadID.xy) * 2; float4 sampleValue = 0.0f; sampleValue.x = TexturesSrg::m_inputTexture[sampleIdx + uint2(0, 0)]; sampleValue.y = TexturesSrg::m_inputTexture[sampleIdx + uint2(1, 0)]; sampleValue.z = TexturesSrg::m_inputTexture[sampleIdx + uint2(0, 1)]; sampleValue.w = TexturesSrg::m_inputTexture[sampleIdx + uint2(1, 1)]; s_threadGroupSharedMem[threadIdx] = sampleValue; GroupMemoryBarrierWithGroupSync(); // Parallel reduce for(uint i = NumThreads/2; i > 0; i /= 2) { if(threadIdx < i) { s_threadGroupSharedMem[threadIdx] += s_threadGroupSharedMem[threadIdx + i]; } GroupMemoryBarrierWithGroupSync(); } if(threadIdx == 0) { const float4 accum = s_threadGroupSharedMem[0]; const float totalSamples = NumThreads * 4; TexturesSrg::m_outputTexture[groupID.xy] = (accum.x + accum.y + accum.z + accum.w) / totalSamples; } }