|
@@ -11,6 +11,8 @@ const uint U32_MAX = 0xFFFFFFFFU;
|
|
|
const uint WORKGROUP_SIZE_X = 16;
|
|
const uint WORKGROUP_SIZE_X = 16;
|
|
|
const uint WORKGROUP_SIZE_Y = 16;
|
|
const uint WORKGROUP_SIZE_Y = 16;
|
|
|
|
|
|
|
|
|
|
+// Every thread will read more pixels since the workgroup size is less than
|
|
|
|
|
+// the tile size.
|
|
|
const uint PIXEL_READ_X = TILE_SIZE_X / WORKGROUP_SIZE_X;
|
|
const uint PIXEL_READ_X = TILE_SIZE_X / WORKGROUP_SIZE_X;
|
|
|
const uint PIXEL_READ_Y = TILE_SIZE_Y / WORKGROUP_SIZE_Y;
|
|
const uint PIXEL_READ_Y = TILE_SIZE_Y / WORKGROUP_SIZE_Y;
|
|
|
|
|
|
|
@@ -26,19 +28,15 @@ layout(
|
|
|
local_size_y = WORKGROUP_SIZE_Y,
|
|
local_size_y = WORKGROUP_SIZE_Y,
|
|
|
local_size_z = 1) in;
|
|
local_size_z = 1) in;
|
|
|
|
|
|
|
|
-shared uvec2 g_minMaxDepth[PIXEL_READ_Y][PIXEL_READ_X];
|
|
|
|
|
|
|
+shared uint g_minDepth;
|
|
|
|
|
+shared uint g_maxDepth;
|
|
|
|
|
|
|
|
//==============================================================================
|
|
//==============================================================================
|
|
|
void main()
|
|
void main()
|
|
|
{
|
|
{
|
|
|
// Init
|
|
// Init
|
|
|
- for(uint y = 0; y < PIXEL_READ_Y; ++y)
|
|
|
|
|
- {
|
|
|
|
|
- for(uint x = 0; x < PIXEL_READ_X; ++x)
|
|
|
|
|
- {
|
|
|
|
|
- g_minMaxDepth[y][x] = uvec2(U32_MAX, 0U);
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ g_minDepth = U32_MAX;
|
|
|
|
|
+ g_maxDepth = 0U;
|
|
|
|
|
|
|
|
memoryBarrierShared();
|
|
memoryBarrierShared();
|
|
|
barrier();
|
|
barrier();
|
|
@@ -47,37 +45,30 @@ void main()
|
|
|
ivec2 coord =
|
|
ivec2 coord =
|
|
|
ivec2(gl_GlobalInvocationID.xy) * ivec2(PIXEL_READ_X, PIXEL_READ_Y);
|
|
ivec2(gl_GlobalInvocationID.xy) * ivec2(PIXEL_READ_X, PIXEL_READ_Y);
|
|
|
|
|
|
|
|
|
|
+ float mind = 10.0;
|
|
|
|
|
+ float maxd = -10.0;
|
|
|
for(uint y = 0; y < PIXEL_READ_Y; ++y)
|
|
for(uint y = 0; y < PIXEL_READ_Y; ++y)
|
|
|
{
|
|
{
|
|
|
for(uint x = 0; x < PIXEL_READ_X; ++x)
|
|
for(uint x = 0; x < PIXEL_READ_X; ++x)
|
|
|
{
|
|
{
|
|
|
float depth = texelFetchOffset(u_depthMap, coord, 0, ivec2(x, y)).r;
|
|
float depth = texelFetchOffset(u_depthMap, coord, 0, ivec2(x, y)).r;
|
|
|
- uint udepth = uint(depth * float(U32_MAX));
|
|
|
|
|
- atomicMin(g_minMaxDepth[y][x].x, udepth);
|
|
|
|
|
- atomicMax(g_minMaxDepth[y][x].y, udepth);
|
|
|
|
|
|
|
+ mind = min(mind, depth);
|
|
|
|
|
+ maxd = max(maxd, depth);
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ uvec2 udepth = uvec2(vec2(mind, maxd) * float(U32_MAX));
|
|
|
|
|
+ atomicMin(g_minDepth, udepth.x);
|
|
|
|
|
+ atomicMax(g_maxDepth, udepth.y);
|
|
|
|
|
+
|
|
|
memoryBarrierShared();
|
|
memoryBarrierShared();
|
|
|
barrier();
|
|
barrier();
|
|
|
|
|
|
|
|
// Write result
|
|
// Write result
|
|
|
if(gl_LocalInvocationIndex == 0)
|
|
if(gl_LocalInvocationIndex == 0)
|
|
|
{
|
|
{
|
|
|
- float mind = 1.0;
|
|
|
|
|
- float maxd = 0.0;
|
|
|
|
|
- for(uint y = 0; y < PIXEL_READ_Y; ++y)
|
|
|
|
|
- {
|
|
|
|
|
- for(uint x = 0; x < PIXEL_READ_X; ++x)
|
|
|
|
|
- {
|
|
|
|
|
- vec2 depthLimits = vec2(g_minMaxDepth[y][x]) / float(U32_MAX);
|
|
|
|
|
- mind = min(mind, depthLimits.x);
|
|
|
|
|
- maxd = max(maxd, depthLimits.y);
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
uint idx = gl_WorkGroupID.y * TILES_COUNT_X + gl_WorkGroupID.x;
|
|
uint idx = gl_WorkGroupID.y * TILES_COUNT_X + gl_WorkGroupID.x;
|
|
|
- u_depthLimits[idx] = vec2(mind, maxd);
|
|
|
|
|
|
|
+ u_depthLimits[idx] = vec2(g_minDepth, g_maxDepth) / float(U32_MAX);
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|