浏览代码

Merge pull request #110060 from BlueCube3310/betsy-rgb

Betsy: Convert RGB to RGBA on the GPU for faster compression
Thaddeus Crews 5 天之前
父节点
当前提交
70a2ce974b
共有 4 个文件被更改,包括 254 次插入24 次删除
  1. 1 0
      modules/betsy/SCsub
  2. 119 24
      modules/betsy/image_compress_betsy.cpp
  3. 10 0
      modules/betsy/image_compress_betsy.h
  4. 124 0
      modules/betsy/rgb_to_rgba.glsl

+ 1 - 0
modules/betsy/SCsub

@@ -11,6 +11,7 @@ env_betsy.GLSL_HEADER("bc6h.glsl")
 env_betsy.GLSL_HEADER("bc1.glsl")
 env_betsy.GLSL_HEADER("bc4.glsl")
 env_betsy.GLSL_HEADER("alpha_stitch.glsl")
+env_betsy.GLSL_HEADER("rgb_to_rgba.glsl")
 
 env_betsy.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"])
 

+ 119 - 24
modules/betsy/image_compress_betsy.cpp

@@ -38,6 +38,7 @@
 #include "bc1.glsl.gen.h"
 #include "bc4.glsl.gen.h"
 #include "bc6h.glsl.gen.h"
+#include "rgb_to_rgba.glsl.gen.h"
 #include "servers/display/display_server.h"
 
 static Mutex betsy_mutex;
@@ -220,6 +221,44 @@ void BetsyCompressor::_init() {
 		cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled);
 		ERR_FAIL_COND(cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline.is_null());
 	}
+
+	{
+		Ref<RDShaderFile> rgb_to_rgba_shader;
+		rgb_to_rgba_shader.instantiate();
+		Error err = rgb_to_rgba_shader->parse_versions_from_text(rgb_to_rgba_shader_glsl);
+
+		if (err != OK) {
+			rgb_to_rgba_shader->print_errors("Betsy RGB to RGBA shader");
+		}
+
+		// Float32.
+		cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_float"));
+		ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled.is_null());
+
+		cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled);
+		ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].pipeline.is_null());
+
+		// Float16.
+		cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_half"));
+		ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled.is_null());
+
+		cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled);
+		ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].pipeline.is_null());
+
+		// Unorm8.
+		cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_unorm8"));
+		ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled.is_null());
+
+		cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled);
+		ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].pipeline.is_null());
+
+		// Unorm16.
+		cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_unorm16"));
+		ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled.is_null());
+
+		cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled);
+		ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].pipeline.is_null());
+	}
 }
 
 void BetsyCompressor::init() {
@@ -284,7 +323,9 @@ static int get_next_multiple(int n, int m) {
 	return n + (m - (n % m));
 }
 
-static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
+static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format, bool &r_is_rgb) {
+	r_is_rgb = false;
+
 	switch (r_img->get_format()) {
 		case Image::FORMAT_L8:
 			r_img->convert(Image::FORMAT_RGBA8);
@@ -305,7 +346,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
 			break;
 
 		case Image::FORMAT_RGB8:
-			r_img->convert(Image::FORMAT_RGBA8);
+			r_is_rgb = true;
 			r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
 			break;
 
@@ -322,7 +363,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
 			break;
 
 		case Image::FORMAT_RGBH:
-			r_img->convert(Image::FORMAT_RGBAH);
+			r_is_rgb = true;
 			r_format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
 			break;
 
@@ -339,7 +380,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
 			break;
 
 		case Image::FORMAT_RGBF:
-			r_img->convert(Image::FORMAT_RGBAF);
+			r_is_rgb = true;
 			r_format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
 			break;
 
@@ -360,7 +401,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
 			break;
 
 		case Image::FORMAT_RGB16:
-			r_img->convert(Image::FORMAT_RGBA16);
+			r_is_rgb = true;
 			r_format = RD::DATA_FORMAT_R16G16B16A16_UNORM;
 			break;
 
@@ -368,23 +409,6 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
 			r_format = RD::DATA_FORMAT_R16G16B16A16_UNORM;
 			break;
 
-		case Image::FORMAT_R16I:
-			r_format = RD::DATA_FORMAT_R16_UINT;
-			break;
-
-		case Image::FORMAT_RG16I:
-			r_format = RD::DATA_FORMAT_R16G16_UINT;
-			break;
-
-		case Image::FORMAT_RGB16I:
-			r_img->convert(Image::FORMAT_RGBA16I);
-			r_format = RD::DATA_FORMAT_R16G16B16A16_UINT;
-			break;
-
-		case Image::FORMAT_RGBA16I:
-			r_format = RD::DATA_FORMAT_R16G16B16A16_UINT;
-			break;
-
 		default: {
 			return ERR_UNAVAILABLE;
 		}
@@ -445,7 +469,8 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) {
 		src_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
 	}
 
-	err = get_src_texture_format(r_img, src_texture_format.format);
+	bool needs_rgb_to_rgba = false;
+	err = get_src_texture_format(r_img, src_texture_format.format, needs_rgb_to_rgba);
 
 	if (err != OK) {
 		return err;
@@ -546,9 +571,79 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) {
 		}
 
 		// Create the textures on the GPU.
-		RID src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images);
+		RID src_texture;
 		RID dst_texture_primary = compress_rd->texture_create(dst_texture_format, RD::TextureView());
 
+		if (needs_rgb_to_rgba) {
+			// RGB textures cannot be sampled directly on most hardware, so we do a little trick involving a compute shader
+			// which takes the input data as an SSBO and converts it directly into an RGBA image.
+			BetsyShaderType rgb_shader_type = BETSY_SHADER_MAX;
+
+			switch (r_img->get_format()) {
+				case Image::FORMAT_RGB8:
+					rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_UNORM8;
+					break;
+				case Image::FORMAT_RGBH:
+					rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_HALF;
+					break;
+				case Image::FORMAT_RGBF:
+					rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_FLOAT;
+					break;
+				case Image::FORMAT_RGB16:
+					rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_UNORM16;
+					break;
+				default:
+					break;
+			}
+
+			// The source 'RGB' buffer.
+			RID source_buffer = compress_rd->storage_buffer_create(src_image_ptr[0].size(), src_image_ptr[0].span());
+
+			RD::TextureFormat rgba_texture_format = src_texture_format;
+			rgba_texture_format.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
+			src_texture = compress_rd->texture_create(rgba_texture_format, RD::TextureView());
+
+			Vector<RD::Uniform> uniforms;
+			{
+				{
+					RD::Uniform u;
+					u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+					u.binding = 0;
+					u.append_id(source_buffer);
+					uniforms.push_back(u);
+				}
+				{
+					RD::Uniform u;
+					u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
+					u.binding = 1;
+					u.append_id(src_texture);
+					uniforms.push_back(u);
+				}
+			}
+
+			BetsyShader &rgb_shader = cached_shaders[rgb_shader_type];
+
+			RID uniform_set = compress_rd->uniform_set_create(uniforms, rgb_shader.compiled, 0);
+			RD::ComputeListID compute_list = compress_rd->compute_list_begin();
+
+			compress_rd->compute_list_bind_compute_pipeline(compute_list, rgb_shader.pipeline);
+			compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
+
+			// Prepare the push constant with the mipmap's resolution.
+			RGBToRGBAPushConstant push_constant;
+			push_constant.width = width;
+			push_constant.height = height;
+
+			compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RGBToRGBAPushConstant));
+			compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 8) / 8, get_next_multiple(height, 8) / 8, 1);
+
+			compress_rd->compute_list_end();
+
+			compress_rd->free_rid(source_buffer);
+		} else {
+			src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images);
+		}
+
 		{
 			Vector<RD::Uniform> uniforms;
 			{

+ 10 - 0
modules/betsy/image_compress_betsy.h

@@ -66,6 +66,10 @@ enum BetsyShaderType {
 	BETSY_SHADER_BC6_SIGNED,
 	BETSY_SHADER_BC6_UNSIGNED,
 	BETSY_SHADER_ALPHA_STITCH,
+	BETSY_SHADER_RGB_TO_RGBA_FLOAT,
+	BETSY_SHADER_RGB_TO_RGBA_HALF,
+	BETSY_SHADER_RGB_TO_RGBA_UNORM8,
+	BETSY_SHADER_RGB_TO_RGBA_UNORM16,
 	BETSY_SHADER_MAX,
 };
 
@@ -85,6 +89,12 @@ struct BC4PushConstant {
 	uint32_t padding[3] = { 0 };
 };
 
+struct RGBToRGBAPushConstant {
+	uint32_t width;
+	uint32_t height;
+	uint32_t padding[2];
+};
+
 void free_device();
 
 Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels);

+ 124 - 0
modules/betsy/rgb_to_rgba.glsl

@@ -0,0 +1,124 @@
+#[versions]
+
+version_float = "#define VER_FLOAT";
+version_half = "#define VER_HALF";
+version_unorm8 = "#define VER_UINT8";
+version_unorm16 = "#define VER_UINT16";
+
+#[compute]
+#version 450
+
+#VERSION_DEFINES
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+layout(std430, binding = 0) buffer Source {
+#if defined(VER_FLOAT)
+	float data[];
+#else
+	uint data[];
+#endif
+}
+source;
+
+#if defined(VER_FLOAT)
+layout(binding = 1, rgba32f) uniform writeonly image2D dest;
+#elif defined(VER_HALF)
+layout(binding = 1, rgba16f) uniform writeonly image2D dest;
+#elif defined(VER_UINT8)
+layout(binding = 1, rgba8) uniform writeonly image2D dest;
+#elif defined(VER_UINT16)
+layout(binding = 1, rgba16) uniform writeonly image2D dest;
+#endif
+
+layout(push_constant, std430) uniform Params {
+	uint p_width;
+	uint p_height;
+	uint p_padding[2];
+}
+params;
+
+void main() {
+	// gl_GlobalInvocationID is equivalent to the current texel coordinates.
+	if (gl_GlobalInvocationID.x >= params.p_width || gl_GlobalInvocationID.y >= params.p_height) {
+		return;
+	}
+
+	// The index of a texel in the source buffer, NOT an index of source.data[]
+	const int texel_index = int(gl_GlobalInvocationID.y * params.p_width + gl_GlobalInvocationID.x);
+
+#if defined(VER_FLOAT)
+	// Since 32-bit floats are aligned with RGBF texel data, just retrieve the values from the array.
+	// Multiply by 3 to align with the components.
+
+	int data_index = texel_index * 3;
+	vec3 color_rgb = vec3(source.data[data_index], source.data[data_index + 1], source.data[data_index + 2]);
+
+#elif defined(VER_UINT8)
+	// RGB8 texel data and 32-bit uints are not aligned, so we have to use a bit of magic.
+	// The source texel can be in either of 4 alignment 'states':
+	// 0 - [ XYZ_-____ ]
+	// 1 - [ _YZW-____ ]
+	// 2 - [ __ZW-X___ ]
+	// 3 - [ ___W-XY__ ]
+	// The texel index additionally needs to be decremented after every 'cycle' in order to properly fit into the source array.
+
+	vec3 color_rgb = vec3(0.0);
+	int data_index = texel_index - (texel_index / 4);
+
+	switch ((texel_index * 3) % 4) {
+		case 0:
+			color_rgb = unpackUnorm4x8(source.data[data_index]).xyz;
+			break;
+		case 1:
+			color_rgb = unpackUnorm4x8(source.data[data_index - 1]).yzw;
+			break;
+		case 2:
+			color_rgb.rg = unpackUnorm4x8(source.data[data_index - 1]).zw;
+			color_rgb.b = unpackUnorm4x8(source.data[data_index]).x;
+			break;
+		case 3:
+			color_rgb.r = unpackUnorm4x8(source.data[data_index - 1]).w;
+			color_rgb.gb = unpackUnorm4x8(source.data[data_index]).xy;
+			break;
+		default:
+			break;
+	}
+
+#else
+	// In a similar vein to RGB8, the RGBH/RGB16 source texel can be in either of 2 alignment 'states':
+	// 0 - [ XY-X_ ]
+	// 1 - [ _Y-XY ]
+	// The texel index has to be incremented this time, as the size of a texel (6 bytes) is greater than that of a 32-bit uint (4 bytes).
+
+	vec3 color_rgb = vec3(0.0);
+	int data_index = texel_index + (texel_index / 2);
+
+	switch ((texel_index * 3) % 2) {
+#if defined(VER_HALF)
+		case 0:
+			color_rgb.xy = unpackHalf2x16(source.data[data_index]);
+			color_rgb.z = unpackHalf2x16(source.data[data_index + 1]).x;
+			break;
+		case 1:
+			color_rgb.x = unpackHalf2x16(source.data[data_index]).y;
+			color_rgb.yz = unpackHalf2x16(source.data[data_index + 1]);
+			break;
+#elif defined(VER_UINT16)
+		case 0:
+			color_rgb.xy = unpackUnorm2x16(source.data[data_index]);
+			color_rgb.z = unpackUnorm2x16(source.data[data_index + 1]).x;
+			break;
+		case 1:
+			color_rgb.x = unpackUnorm2x16(source.data[data_index]).y;
+			color_rgb.yz = unpackUnorm2x16(source.data[data_index + 1]);
+			break;
+#endif
+		default:
+			break;
+	}
+#endif
+
+	// Store the resulting RGBA color.
+	imageStore(dest, ivec2(gl_GlobalInvocationID.xy), vec4(color_rgb, 1.0));
+}