瀏覽代碼

Tiler min/max is now a compute job

Panagiotis Christopoulos Charitos 10 年之前
父節點
當前提交
832689e9d5
共有 2 個文件被更改,包括 15 次插入61 次删除
  1. 0 37
      shaders/PpsSslr.frag.glsl
  2. 15 24
      shaders/TilerMinMax.comp.glsl

+ 0 - 37
shaders/PpsSslr.frag.glsl

@@ -162,41 +162,4 @@ void main()
 		}
 	}
 #endif
-
-#if 0
-	vec3 pos = p0;
-	for(int i = 0; i < 200; i++)
-	{
-		pos += r * 0.1;
-
-		vec3 posNdc = project(pos);
-
-		vec2 comp = abs(posNdc.xy);
-		if(comp.x > 1.0 || comp.y > 1.0)
-		{
-			return;
-		}
-
-		vec3 texCoord = posNdc.xyz * 0.5 + 0.5;
-
-		float depth = textureRt(u_msDepthRt, texCoord.xy).r;
-
-		float diffDepth = texCoord.z - depth;
-
-		if(diffDepth > 0.0)
-		{
-			if(diffDepth > 0.001)
-			{
-				break;
-			}
-
-			float factor = 1.0 - length(posNdc.xy);
-
-			out_color = 
-				textureRt(u_isRt, texCoord.xy).rgb * (factor * specColor);
-			//out_color = vec3(diffDepth);
-			return;
-		}
-	}
-#endif
 }

+ 15 - 24
shaders/TilerMinMax.comp.glsl

@@ -11,6 +11,8 @@ const uint U32_MAX = 0xFFFFFFFFU;
 const uint WORKGROUP_SIZE_X = 16;
 const uint WORKGROUP_SIZE_Y = 16;
 
+// Every thread will read more pixels since the workgroup size is less than
+// the tile size.
 const uint PIXEL_READ_X = TILE_SIZE_X / WORKGROUP_SIZE_X;
 const uint PIXEL_READ_Y = TILE_SIZE_Y / WORKGROUP_SIZE_Y;
 
@@ -26,19 +28,15 @@ layout(
 	local_size_y = WORKGROUP_SIZE_Y, 
 	local_size_z = 1) in;
 
-shared uvec2 g_minMaxDepth[PIXEL_READ_Y][PIXEL_READ_X];
+shared uint g_minDepth;
+shared uint g_maxDepth;
 
 //==============================================================================
 void main()
 {
 	// Init
-	for(uint y = 0; y < PIXEL_READ_Y; ++y)
-	{
-		for(uint x = 0; x < PIXEL_READ_X; ++x)
-		{
-			g_minMaxDepth[y][x] = uvec2(U32_MAX, 0U);
-		}
-	}
+	g_minDepth = U32_MAX;
+	g_maxDepth = 0U;
 
 	memoryBarrierShared();
 	barrier();
@@ -47,37 +45,30 @@ void main()
 	ivec2 coord = 
 		ivec2(gl_GlobalInvocationID.xy) * ivec2(PIXEL_READ_X, PIXEL_READ_Y);
 
+	float mind = 10.0;
+	float maxd = -10.0;
 	for(uint y = 0; y < PIXEL_READ_Y; ++y)
 	{
 		for(uint x = 0; x < PIXEL_READ_X; ++x)
 		{
 			float depth = texelFetchOffset(u_depthMap, coord, 0, ivec2(x, y)).r;
-			uint udepth = uint(depth * float(U32_MAX));
-			atomicMin(g_minMaxDepth[y][x].x, udepth);
-			atomicMax(g_minMaxDepth[y][x].y, udepth);
+			mind = min(mind, depth);
+			maxd = max(maxd, depth);
 		}
 	}
 
+	uvec2 udepth = uvec2(vec2(mind, maxd) * float(U32_MAX));
+	atomicMin(g_minDepth, udepth.x);
+	atomicMax(g_maxDepth, udepth.y);
+
 	memoryBarrierShared();
 	barrier();
 
 	// Write result
 	if(gl_LocalInvocationIndex == 0)
 	{
-		float mind = 1.0;
-		float maxd = 0.0;
-		for(uint y = 0; y < PIXEL_READ_Y; ++y)
-		{
-			for(uint x = 0; x < PIXEL_READ_X; ++x)
-			{
-				vec2 depthLimits = vec2(g_minMaxDepth[y][x]) / float(U32_MAX);
-				mind = min(mind, depthLimits.x);
-				maxd = max(maxd, depthLimits.y);
-			}
-		}
-
 		uint idx = gl_WorkGroupID.y * TILES_COUNT_X + gl_WorkGroupID.x;
-		u_depthLimits[idx] = vec2(mind, maxd);
+		u_depthLimits[idx] = vec2(g_minDepth, g_maxDepth) / float(U32_MAX);
 	}
 }