// Copyright (C) 2009-present, Panagiotis Christopoulos Charitos and contributors. // All rights reserved. // Code licensed under the BSD License. // http://www.anki3d.org/LICENSE #pragma anki technique comp #include #include #define THREAD_COUNT_X 32u #define THREAD_COUNT_Y 16u #define THREAD_COUNT UVec2(THREAD_COUNT_X, THREAD_COUNT_Y) Texture2D g_tex : register(t0); #define TONEMAPPING_REGISTER u0 #include groupshared F32 s_avgLum[THREAD_COUNT_X * THREAD_COUNT_Y]; [numthreads(THREAD_COUNT_X, THREAD_COUNT_Y, 1)] void main(UVec3 svGroupThreadId : SV_GROUPTHREADID, U32 svGroupIndex : SV_GROUPINDEX) { UVec2 inputTexSize; g_tex.GetDimensions(inputTexSize.x, inputTexSize.y); // Align the tex size to workgroup size const UVec2 alignedInputTexSize = THREAD_COUNT * ((inputTexSize + THREAD_COUNT - 1u) / THREAD_COUNT); const UVec2 pixelsPerTile = alignedInputTexSize / THREAD_COUNT; // Gather the log-average luminance of a tile. It will miss some pixels but not too many const U32 yStart = svGroupThreadId.y * pixelsPerTile.y; const U32 xStart = svGroupThreadId.x * pixelsPerTile.x; const F32 weight = (1.0 / F32(inputTexSize.x * inputTexSize.y)); F32 avgLum = 0.0; for(U32 y = 0u; y < pixelsPerTile.y; ++y) { for(U32 x = 0u; x < pixelsPerTile.x; ++x) { const UVec2 uv = UVec2(xStart, yStart) + UVec2(x, y); if(uv.x >= F32(inputTexSize.x) || uv.y >= F32(inputTexSize.y)) { continue; } const Vec3 color = g_tex.Load(IVec3(uv, 0)).rgb; const F32 lum = computeLuminance(color); avgLum += lum * weight; } } s_avgLum[svGroupIndex] = avgLum; GroupMemoryBarrierWithGroupSync(); // Gather the results into one [loop] for(U32 s = (THREAD_COUNT_X * THREAD_COUNT_Y) / 2u; s > 0u; s >>= 1u) { if(svGroupIndex < s) { s_avgLum[svGroupIndex] += s_avgLum[svGroupIndex + s]; } #if ANKI_PLATFORM_MOBILE if(s > WaveGetLaneCount()) { GroupMemoryBarrierWithGroupSync(); } #else GroupMemoryBarrierWithGroupSync(); #endif } // Write the result [branch] if(svGroupIndex == 0u) { const F32 crntLum = s_avgLum[0]; #if 1 const F32 prevLum = readExposureAndAverageLuminance().y; // Lerp between previous and new L value const F32 interpolationFactor = 0.05; F32 finalAvgLum = lerp(prevLum, crntLum, interpolationFactor); #else F32 finalAvgLum = crntLum; #endif // This is a workaround because sometimes the avg lum becomes nan finalAvgLum = clamp(finalAvgLum, kEpsilonF32, kMaxF32); writeExposureAndAverageLuminance(computeExposure(finalAvgLum, 0.0f), finalAvgLum); } }