|
@@ -3,7 +3,7 @@
|
|
|
// Code licensed under the BSD License.
|
|
// Code licensed under the BSD License.
|
|
|
// http://www.anki3d.org/LICENSE
|
|
// http://www.anki3d.org/LICENSE
|
|
|
|
|
|
|
|
-#pragma anki input const UVec2 INPUT_TEX_SIZE
|
|
|
|
|
|
|
+ANKI_SPECIALIZATION_CONSTANT_UVEC2(INPUT_TEX_SIZE, 0, UVec2(1));
|
|
|
|
|
|
|
|
#pragma anki start comp
|
|
#pragma anki start comp
|
|
|
#define LOG_AVG 0
|
|
#define LOG_AVG 0
|
|
@@ -11,16 +11,13 @@
|
|
|
#include <shaders/Common.glsl>
|
|
#include <shaders/Common.glsl>
|
|
|
#include <shaders/Tonemapping.glsl>
|
|
#include <shaders/Tonemapping.glsl>
|
|
|
|
|
|
|
|
-const UVec2 PREFERABLE_WG_SIZE = UVec2(32u, 32u);
|
|
|
|
|
-
|
|
|
|
|
-// Find a WG size that is dividable to the input tex size
|
|
|
|
|
-const UVec2 WORKGROUP_SIZE = INPUT_TEX_SIZE / ((INPUT_TEX_SIZE + PREFERABLE_WG_SIZE - 1u) / PREFERABLE_WG_SIZE);
|
|
|
|
|
-const U32 WG_SIZE = WORKGROUP_SIZE.x * WORKGROUP_SIZE.y;
|
|
|
|
|
-
|
|
|
|
|
-const UVec2 TRIMMED_INPUT_TEX_SIZE = (INPUT_TEX_SIZE / WORKGROUP_SIZE) * WORKGROUP_SIZE;
|
|
|
|
|
-
|
|
|
|
|
|
|
+const UVec2 WORKGROUP_SIZE = UVec2(32u, 32u);
|
|
|
layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
|
|
layout(local_size_x = WORKGROUP_SIZE.x, local_size_y = WORKGROUP_SIZE.y, local_size_z = 1) in;
|
|
|
|
|
|
|
|
|
|
+// Align the tex size to workgroup size
|
|
|
|
|
+const UVec2 ALIGNED_INPUT_TEX_SIZE = WORKGROUP_SIZE * ((INPUT_TEX_SIZE + WORKGROUP_SIZE - 1u) / WORKGROUP_SIZE);
|
|
|
|
|
+const UVec2 PIXELS_PER_TILE = ALIGNED_INPUT_TEX_SIZE / WORKGROUP_SIZE;
|
|
|
|
|
+
|
|
|
layout(set = 0, binding = 0) uniform texture2D u_tex;
|
|
layout(set = 0, binding = 0) uniform texture2D u_tex;
|
|
|
|
|
|
|
|
#define TONEMAPPING_RESOURCE_AS_BUFFER 1
|
|
#define TONEMAPPING_RESOURCE_AS_BUFFER 1
|
|
@@ -28,25 +25,27 @@ layout(set = 0, binding = 0) uniform texture2D u_tex;
|
|
|
#define TONEMAPPING_BINDING 1
|
|
#define TONEMAPPING_BINDING 1
|
|
|
#include <shaders/TonemappingResources.glsl>
|
|
#include <shaders/TonemappingResources.glsl>
|
|
|
|
|
|
|
|
-shared F32 s_avgLum[WG_SIZE];
|
|
|
|
|
|
|
+shared F32 s_avgLum[WORKGROUP_SIZE.x * WORKGROUP_SIZE.y];
|
|
|
|
|
|
|
|
void main()
|
|
void main()
|
|
|
{
|
|
{
|
|
|
// Gather the log-average luminance of a tile. It will miss some pixels but not too many
|
|
// Gather the log-average luminance of a tile. It will miss some pixels but not too many
|
|
|
- const U32 PIXEL_READ_X = TRIMMED_INPUT_TEX_SIZE.x / WORKGROUP_SIZE.x;
|
|
|
|
|
- const U32 PIXEL_READ_Y = TRIMMED_INPUT_TEX_SIZE.y / WORKGROUP_SIZE.y;
|
|
|
|
|
-
|
|
|
|
|
- const U32 yStart = gl_LocalInvocationID.y * PIXEL_READ_Y;
|
|
|
|
|
- const U32 xStart = gl_LocalInvocationID.x * PIXEL_READ_X;
|
|
|
|
|
|
|
+ const U32 yStart = gl_LocalInvocationID.y * PIXELS_PER_TILE.y;
|
|
|
|
|
+ const U32 xStart = gl_LocalInvocationID.x * PIXELS_PER_TILE.x;
|
|
|
|
|
|
|
|
F32 avgLum = 0.0;
|
|
F32 avgLum = 0.0;
|
|
|
- ANKI_UNROLL for(U32 y = 0; y < PIXEL_READ_Y; ++y)
|
|
|
|
|
|
|
+ ANKI_UNROLL for(U32 y = 0; y < PIXELS_PER_TILE.y; ++y)
|
|
|
{
|
|
{
|
|
|
- ANKI_UNROLL for(U32 x = 0; x < PIXEL_READ_X; ++x)
|
|
|
|
|
|
|
+ ANKI_UNROLL for(U32 x = 0; x < PIXELS_PER_TILE.x; ++x)
|
|
|
{
|
|
{
|
|
|
- IVec2 uv = IVec2(xStart, yStart) + IVec2(x, y);
|
|
|
|
|
- Vec3 color = texelFetch(u_tex, uv, 0).rgb;
|
|
|
|
|
- F32 lum = computeLuminance(color);
|
|
|
|
|
|
|
+ const UVec2 uv = UVec2(xStart, yStart) + UVec2(x, y);
|
|
|
|
|
+ if(uv.x >= INPUT_TEX_SIZE.x || uv.y >= INPUT_TEX_SIZE.y)
|
|
|
|
|
+ {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ const Vec3 color = texelFetch(u_tex, IVec2(uv), 0).rgb;
|
|
|
|
|
+ const F32 lum = computeLuminance(color);
|
|
|
#if LOG_AVG
|
|
#if LOG_AVG
|
|
|
avgLum += log(max(EPSILON, lum));
|
|
avgLum += log(max(EPSILON, lum));
|
|
|
#else
|
|
#else
|
|
@@ -61,7 +60,7 @@ void main()
|
|
|
barrier();
|
|
barrier();
|
|
|
|
|
|
|
|
// Gather the results into one
|
|
// Gather the results into one
|
|
|
- ANKI_LOOP for(U32 s = WG_SIZE / 2u; s > 0u; s >>= 1u)
|
|
|
|
|
|
|
+ ANKI_LOOP for(U32 s = (WORKGROUP_SIZE.x * WORKGROUP_SIZE.y) / 2u; s > 0u; s >>= 1u)
|
|
|
{
|
|
{
|
|
|
if(gl_LocalInvocationIndex < s)
|
|
if(gl_LocalInvocationIndex < s)
|
|
|
{
|
|
{
|
|
@@ -76,9 +75,9 @@ void main()
|
|
|
ANKI_BRANCH if(gl_LocalInvocationIndex == 0u)
|
|
ANKI_BRANCH if(gl_LocalInvocationIndex == 0u)
|
|
|
{
|
|
{
|
|
|
#if LOG_AVG
|
|
#if LOG_AVG
|
|
|
- const F32 crntLum = exp(s_avgLum[0] * (1.0 / F32(TRIMMED_INPUT_TEX_SIZE.x * TRIMMED_INPUT_TEX_SIZE.y)));
|
|
|
|
|
|
|
+ const F32 crntLum = exp(s_avgLum[0] * (1.0 / F32(INPUT_TEX_SIZE.x * INPUT_TEX_SIZE.y)));
|
|
|
#else
|
|
#else
|
|
|
- const F32 crntLum = s_avgLum[0] * (1.0 / F32(TRIMMED_INPUT_TEX_SIZE.x * TRIMMED_INPUT_TEX_SIZE.y));
|
|
|
|
|
|
|
+ const F32 crntLum = s_avgLum[0] * (1.0 / F32(INPUT_TEX_SIZE.x * INPUT_TEX_SIZE.y));
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
#if 1
|
|
#if 1
|