Quellcode durchsuchen

Betsy: Remove OGRE aliases

BlueCube3310 vor 5 Monaten
Ursprung
Commit
affa27d188

+ 0 - 75
modules/betsy/CrossPlatformSettings_piece_all.glsl

@@ -1,75 +0,0 @@
-#define min3(a, b, c) min(a, min(b, c))
-#define max3(a, b, c) max(a, max(b, c))
-
-#define float2 vec2
-#define float3 vec3
-#define float4 vec4
-
-#define int2 ivec2
-#define int3 ivec3
-#define int4 ivec4
-
-#define uint2 uvec2
-#define uint3 uvec3
-#define uint4 uvec4
-
-#define float2x2 mat2
-#define float3x3 mat3
-#define float4x4 mat4
-#define ogre_float4x3 mat3x4
-
-#define ushort uint
-#define ushort3 uint3
-#define ushort4 uint4
-
-//Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal
-#define rshort int
-#define rshort2 int2
-#define rint int
-//Short used for write operations. It's an int in GLSL. An ushort in HLSL & Metal
-#define wshort2 int2
-#define wshort3 int3
-
-#define toFloat3x3(x) mat3(x)
-#define buildFloat3x3(row0, row1, row2) mat3(row0, row1, row2)
-
-#define mul(x, y) ((x) * (y))
-#define saturate(x) clamp((x), 0.0, 1.0)
-#define lerp mix
-#define rsqrt inversesqrt
-#define INLINE
-#define NO_INTERPOLATION_PREFIX flat
-#define NO_INTERPOLATION_SUFFIX
-
-#define PARAMS_ARG_DECL
-#define PARAMS_ARG
-
-#define reversebits bitfieldReverse
-
-#define OGRE_Sample(tex, sampler, uv) texture(tex, uv)
-#define OGRE_SampleLevel(tex, sampler, uv, lod) textureLod(tex, uv, lod)
-#define OGRE_SampleArray2D(tex, sampler, uv, arrayIdx) texture(tex, vec3(uv, arrayIdx))
-#define OGRE_SampleArray2DLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec3(uv, arrayIdx), lod)
-#define OGRE_SampleArrayCubeLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec4(uv, arrayIdx), lod)
-#define OGRE_SampleGrad(tex, sampler, uv, ddx, ddy) textureGrad(tex, uv, ddx, ddy)
-#define OGRE_SampleArray2DGrad(tex, sampler, uv, arrayIdx, ddx, ddy) textureGrad(tex, vec3(uv, arrayIdx), ddx, ddy)
-#define OGRE_ddx(val) dFdx(val)
-#define OGRE_ddy(val) dFdy(val)
-#define OGRE_Load2D(tex, iuv, lod) texelFetch(tex, iuv, lod)
-#define OGRE_LoadArray2D(tex, iuv, arrayIdx, lod) texelFetch(tex, ivec3(iuv, arrayIdx), lod)
-#define OGRE_Load2DMS(tex, iuv, subsample) texelFetch(tex, iuv, subsample)
-
-#define OGRE_Load3D(tex, iuv, lod) texelFetch(tex, ivec3(iuv), lod)
-
-#define OGRE_GatherRed(tex, sampler, uv) textureGather(tex, uv, 0)
-#define OGRE_GatherGreen(tex, sampler, uv) textureGather(tex, uv, 1)
-#define OGRE_GatherBlue(tex, sampler, uv) textureGather(tex, uv, 2)
-
-#define bufferFetch1(buffer, idx) texelFetch(buffer, idx).x
-
-#define OGRE_SAMPLER_ARG_DECL(samplerName)
-#define OGRE_SAMPLER_ARG(samplerName)
-
-#define OGRE_Texture3D_float4 sampler3D
-#define OGRE_OUT_REF(declType, variableName) out declType variableName
-#define OGRE_INOUT_REF(declType, variableName) inout declType variableName

+ 5 - 7
modules/betsy/alpha_stitch.glsl

@@ -1,12 +1,10 @@
-// RGB and Alpha components of ETC2 RGBA are computed separately.
+// RGB and Alpha components of ETC2 RGBA/DXT5 are computed separately.
 // This compute shader merely stitches them together to form the final result
-// It's also used by RG11 driver to stitch two R11 into one RG11
+// It's also used by RG11/BC4 driver to stitch two R11/BC4 into one RG11/BC5
 
 #[compute]
 #version 450
 
-#include "CrossPlatformSettings_piece_all.glsl"
-
 layout(local_size_x = 8, //
 		local_size_y = 8, //
 		local_size_z = 1) in;
@@ -16,8 +14,8 @@ layout(binding = 1) uniform usampler2D srcAlpha;
 layout(binding = 2, rgba32ui) uniform restrict writeonly uimage2D dstTexture;
 
 void main() {
-	uint2 rgbBlock = OGRE_Load2D(srcRGB, int2(gl_GlobalInvocationID.xy), 0).xy;
-	uint2 alphaBlock = OGRE_Load2D(srcAlpha, int2(gl_GlobalInvocationID.xy), 0).xy;
+	uvec2 rgbBlock = texelFetch(srcRGB, ivec2(gl_GlobalInvocationID.xy), 0).xy;
+	uvec2 alphaBlock = texelFetch(srcAlpha, ivec2(gl_GlobalInvocationID.xy), 0).xy;
 
-	imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), uint4(rgbBlock.xy, alphaBlock.xy));
+	imageStore(dstTexture, ivec2(gl_GlobalInvocationID.xy), uvec4(rgbBlock.xy, alphaBlock.xy));
 }

+ 58 - 58
modules/betsy/bc1.glsl

@@ -6,7 +6,7 @@ dithered = "#define BC1_DITHER";
 #[compute]
 #version 450
 
-#include "CrossPlatformSettings_piece_all.glsl"
+#VERSION_DEFINES
 
 #define FLT_MAX 340282346638528859811704183484516925440.0f
 
@@ -14,8 +14,8 @@ layout(binding = 0) uniform sampler2D srcTex;
 layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture;
 
 layout(std430, binding = 2) readonly restrict buffer globalBuffer {
-	float2 c_oMatch5[256];
-	float2 c_oMatch6[256];
+	vec2 c_oMatch5[256];
+	vec2 c_oMatch6[256];
 };
 
 layout(push_constant, std430) uniform Params {
@@ -28,14 +28,14 @@ layout(local_size_x = 8, //
 		local_size_y = 8, //
 		local_size_z = 1) in;
 
-float3 rgb565to888(float rgb565) {
-	float3 retVal;
+vec3 rgb565to888(float rgb565) {
+	vec3 retVal;
 	retVal.x = floor(rgb565 / 2048.0f);
 	retVal.y = floor(mod(rgb565, 2048.0f) / 32.0f);
 	retVal.z = floor(mod(rgb565, 32.0f));
 
 	// This is the correct 565 to 888 conversion:
-	//		rgb = floor( rgb * ( 255.0f / float3( 31.0f, 63.0f, 31.0f ) ) + 0.5f )
+	//		rgb = floor( rgb * ( 255.0f / vec3( 31.0f, 63.0f, 31.0f ) ) + 0.5f )
 	//
 	// However stb_dxt follows a different one:
 	//		rb = floor( rb * ( 256 / 32 + 8 / 32 ) );
@@ -52,10 +52,10 @@ float3 rgb565to888(float rgb565) {
 	// Perhaps when we make 888 -> 565 -> 888 it doesn't matter
 	// because they end up mapping to the original number
 
-	return floor(retVal * float3(8.25f, 4.0625f, 8.25f));
+	return floor(retVal * vec3(8.25f, 4.0625f, 8.25f));
 }
 
-float rgb888to565(float3 rgbValue) {
+float rgb888to565(vec3 rgbValue) {
 	rgbValue.rb = floor(rgbValue.rb * 31.0f / 255.0f + 0.5f);
 	rgbValue.g = floor(rgbValue.g * 63.0f / 255.0f + 0.5f);
 
@@ -63,7 +63,7 @@ float rgb888to565(float3 rgbValue) {
 }
 
 // linear interpolation at 1/3 point between a and b, using desired rounding type
-float3 lerp13(float3 a, float3 b) {
+vec3 lerp13(vec3 a, vec3 b) {
 #ifdef STB_DXT_USE_ROUNDING_BIAS
 	// with rounding bias
 	return a + floor((b - a) * (1.0f / 3.0f) + 0.5f);
@@ -74,7 +74,7 @@ float3 lerp13(float3 a, float3 b) {
 }
 
 /// Unpacks a block of 4 colors from two 16-bit endpoints
-void EvalColors(out float3 colors[4], float c0, float c1) {
+void EvalColors(out vec3 colors[4], float c0, float c1) {
 	colors[0] = rgb565to888(c0);
 	colors[1] = rgb565to888(c1);
 	colors[2] = lerp13(colors[0], colors[1]);
@@ -89,13 +89,13 @@ void EvalColors(out float3 colors[4], float c0, float c1) {
 */
 void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16, out float outMaxEndp16) {
 	// determine color distribution
-	float3 avgColor;
-	float3 minColor;
-	float3 maxColor;
+	vec3 avgColor;
+	vec3 minColor;
+	vec3 maxColor;
 
 	avgColor = minColor = maxColor = unpackUnorm4x8(srcPixelsBlock[0]).xyz;
 	for (int i = 1; i < 16; ++i) {
-		const float3 currColorUnorm = unpackUnorm4x8(srcPixelsBlock[i]).xyz;
+		const vec3 currColorUnorm = unpackUnorm4x8(srcPixelsBlock[i]).xyz;
 		avgColor += currColorUnorm;
 		minColor = min(minColor, currColorUnorm);
 		maxColor = max(maxColor, currColorUnorm);
@@ -112,8 +112,8 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
 	}
 
 	for (int i = 0; i < 16; ++i) {
-		const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
-		float3 rgbDiff = currColor - avgColor;
+		const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
+		vec3 rgbDiff = currColor - avgColor;
 
 		cov[0] += rgbDiff.r * rgbDiff.r;
 		cov[1] += rgbDiff.r * rgbDiff.g;
@@ -128,7 +128,7 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
 		cov[i] /= 255.0f;
 	}
 
-	float3 vF = maxColor - minColor;
+	vec3 vF = maxColor - minColor;
 
 	const int nIterPower = 4;
 	for (int iter = 0; iter < nIterPower; ++iter) {
@@ -141,8 +141,8 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
 		vF.b = b;
 	}
 
-	float magn = max3(abs(vF.r), abs(vF.g), abs(vF.b));
-	float3 v;
+	float magn = max(abs(vF.r), max(abs(vF.g), abs(vF.b)));
+	vec3 v;
 
 	if (magn < 4.0f) { // too small, default to luminance
 		v.r = 299.0f; // JPEG YCbCr luma coefs, scaled by 1000.
@@ -153,11 +153,11 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
 	}
 
 	// Pick colors at extreme points
-	float3 minEndpoint, maxEndpoint;
+	vec3 minEndpoint, maxEndpoint;
 	float minDot = FLT_MAX;
 	float maxDot = -FLT_MAX;
 	for (int i = 0; i < 16; ++i) {
-		const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
+		const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
 		const float dotValue = dot(currColor, v);
 
 		if (dotValue < minDot) {
@@ -176,9 +176,9 @@ void OptimizeColorsBlock(const uint srcPixelsBlock[16], out float outMinEndp16,
 }
 
 // The color matching function
-uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
+uint MatchColorsBlock(const uint srcPixelsBlock[16], vec3 color[4]) {
 	uint mask = 0u;
-	float3 dir = color[0] - color[1];
+	vec3 dir = color[0] - color[1];
 	float stops[4];
 
 	for (int i = 0; i < 4; ++i) {
@@ -200,7 +200,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
 #ifndef BC1_DITHER
 	// the version without dithering is straightforward
 	for (uint i = 16u; i-- > 0u;) {
-		const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
+		const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
 
 		const float dotValue = dot(currColor, dir);
 		mask <<= 2u;
@@ -213,8 +213,8 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
 	}
 #else
 	// with floyd-steinberg dithering
-	float4 ep1 = float4(0, 0, 0, 0);
-	float4 ep2 = float4(0, 0, 0, 0);
+	vec4 ep1 = vec4(0, 0, 0, 0);
+	vec4 ep2 = vec4(0, 0, 0, 0);
 
 	c0Point *= 16.0f;
 	halfPoint *= 16.0f;
@@ -224,7 +224,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
 		float ditherDot;
 		uint lmask, step;
 
-		float3 currColor;
+		vec3 currColor;
 		float dotValue;
 
 		currColor = unpackUnorm4x8(srcPixelsBlock[y * 4 + 0]).xyz * 255.0f;
@@ -277,7 +277,7 @@ uint MatchColorsBlock(const uint srcPixelsBlock[16], float3 color[4]) {
 
 		mask |= lmask << (y * 8u);
 		{
-			float4 tmp = ep1;
+			vec4 tmp = ep1;
 			ep1 = ep2;
 			ep2 = tmp;
 		} // swap
@@ -300,7 +300,7 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
 	{
 		// yes, linear system would be singular; solve using optimal
 		// single-color match on average color
-		float3 rgbVal = float3(8.0f / 255.0f, 8.0f / 255.0f, 8.0f / 255.0f);
+		vec3 rgbVal = vec3(8.0f / 255.0f, 8.0f / 255.0f, 8.0f / 255.0f);
 		for (int i = 0; i < 16; ++i) {
 			rgbVal += unpackUnorm4x8(srcPixelsBlock[i]).xyz;
 		}
@@ -322,10 +322,10 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
 
 		float akku = 0.0f;
 		uint cm = mask;
-		float3 at1 = float3(0, 0, 0);
-		float3 at2 = float3(0, 0, 0);
+		vec3 at1 = vec3(0, 0, 0);
+		vec3 at2 = vec3(0, 0, 0);
 		for (int i = 0; i < 16; ++i, cm >>= 2u) {
-			const float3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
+			const vec3 currColor = unpackUnorm4x8(srcPixelsBlock[i]).xyz * 255.0f;
 
 			const uint step = cm & 3u;
 			const float w1 = w1Tab[step];
@@ -341,17 +341,17 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
 		const float yy = floor(mod(akku, 65535.0f) / 256.0f);
 		const float xy = mod(akku, 256.0f);
 
-		float2 f_rb_g;
+		vec2 f_rb_g;
 		f_rb_g.x = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy);
 		f_rb_g.y = f_rb_g.x * 63.0f / 31.0f;
 
 		// solve.
-		const float3 newMaxVal = clamp(floor((at1 * yy - at2 * xy) * f_rb_g.xyx + 0.5f),
-				float3(0.0f, 0.0f, 0.0f), float3(31, 63, 31));
+		const vec3 newMaxVal = clamp(floor((at1 * yy - at2 * xy) * f_rb_g.xyx + 0.5f),
+				vec3(0.0f, 0.0f, 0.0f), vec3(31, 63, 31));
 		newMax16 = newMaxVal.x * 2048.0f + newMaxVal.y * 32.0f + newMaxVal.z;
 
-		const float3 newMinVal = clamp(floor((at2 * xx - at1 * xy) * f_rb_g.xyx + 0.5f),
-				float3(0.0f, 0.0f, 0.0f), float3(31, 63, 31));
+		const vec3 newMinVal = clamp(floor((at2 * xx - at1 * xy) * f_rb_g.xyx + 0.5f),
+				vec3(0.0f, 0.0f, 0.0f), vec3(31, 63, 31));
 		newMin16 = newMinVal.x * 2048.0f + newMinVal.y * 32.0f + newMinVal.z;
 	}
 
@@ -364,48 +364,48 @@ bool RefineBlock(const uint srcPixelsBlock[16], uint mask, inout float inOutMinE
 #ifdef BC1_DITHER
 /// Quantizes 'srcValue' which is originally in 888 (full range),
 /// converting it to 565 and then back to 888 (quantized)
-float3 quant(float3 srcValue) {
+vec3 quant(vec3 srcValue) {
 	srcValue = clamp(srcValue, 0.0f, 255.0f);
 	// Convert 888 -> 565
-	srcValue = floor(srcValue * float3(31.0f / 255.0f, 63.0f / 255.0f, 31.0f / 255.0f) + 0.5f);
+	srcValue = floor(srcValue * vec3(31.0f / 255.0f, 63.0f / 255.0f, 31.0f / 255.0f) + 0.5f);
 	// Convert 565 -> 888 back
-	srcValue = floor(srcValue * float3(8.25f, 4.0625f, 8.25f));
+	srcValue = floor(srcValue * vec3(8.25f, 4.0625f, 8.25f));
 
 	return srcValue;
 }
 
 void DitherBlock(const uint srcPixBlck[16], out uint dthPixBlck[16]) {
-	float3 ep1[4] = { float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0) };
-	float3 ep2[4] = { float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0), float3(0, 0, 0) };
+	vec3 ep1[4] = { vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0) };
+	vec3 ep2[4] = { vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0), vec3(0, 0, 0) };
 
 	for (uint y = 0u; y < 16u; y += 4u) {
-		float3 srcPixel, dithPixel;
+		vec3 srcPixel, dithPixel;
 
 		srcPixel = unpackUnorm4x8(srcPixBlck[y + 0u]).xyz * 255.0f;
 		dithPixel = quant(srcPixel + trunc((3 * ep2[1] + 5 * ep2[0]) * (1.0f / 16.0f)));
 		ep1[0] = srcPixel - dithPixel;
-		dthPixBlck[y + 0u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
+		dthPixBlck[y + 0u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
 
 		srcPixel = unpackUnorm4x8(srcPixBlck[y + 1u]).xyz * 255.0f;
 		dithPixel = quant(
 				srcPixel + trunc((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) * (1.0f / 16.0f)));
 		ep1[1] = srcPixel - dithPixel;
-		dthPixBlck[y + 1u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
+		dthPixBlck[y + 1u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
 
 		srcPixel = unpackUnorm4x8(srcPixBlck[y + 2u]).xyz * 255.0f;
 		dithPixel = quant(
 				srcPixel + trunc((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) * (1.0f / 16.0f)));
 		ep1[2] = srcPixel - dithPixel;
-		dthPixBlck[y + 2u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
+		dthPixBlck[y + 2u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
 
 		srcPixel = unpackUnorm4x8(srcPixBlck[y + 3u]).xyz * 255.0f;
 		dithPixel = quant(srcPixel + trunc((7 * ep1[2] + 5 * ep2[3] + ep2[2]) * (1.0f / 16.0f)));
 		ep1[3] = srcPixel - dithPixel;
-		dthPixBlck[y + 3u] = packUnorm4x8(float4(dithPixel * (1.0f / 255.0f), 1.0f));
+		dthPixBlck[y + 3u] = packUnorm4x8(vec4(dithPixel * (1.0f / 255.0f), 1.0f));
 
 		// swap( ep1, ep2 )
 		for (uint i = 0u; i < 4u; ++i) {
-			float3 tmp = ep1[i];
+			vec3 tmp = ep1[i];
 			ep1[i] = ep2[i];
 			ep2[i] = tmp;
 		}
@@ -419,11 +419,11 @@ void main() {
 	bool bAllColorsEqual = true;
 
 	// Load the whole 4x4 block
-	const uint2 pixelsToLoadBase = gl_GlobalInvocationID.xy << 2u;
+	const uvec2 pixelsToLoadBase = gl_GlobalInvocationID.xy << 2u;
 	for (uint i = 0u; i < 16u; ++i) {
-		const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i & 0x03u, i >> 2u);
-		const float3 srcPixels0 = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyz;
-		srcPixelsBlock[i] = packUnorm4x8(float4(srcPixels0, 1.0f));
+		const uvec2 pixelsToLoad = pixelsToLoadBase + uvec2(i & 0x03u, i >> 2u);
+		const vec3 srcPixels0 = texelFetch(srcTex, ivec2(pixelsToLoad), 0).xyz;
+		srcPixelsBlock[i] = packUnorm4x8(vec4(srcPixels0, 1.0f));
 		bAllColorsEqual = bAllColorsEqual && srcPixelsBlock[0] == srcPixelsBlock[i];
 	}
 
@@ -431,7 +431,7 @@ void main() {
 	uint mask = 0u;
 
 	if (bAllColorsEqual) {
-		const uint3 rgbVal = uint3(unpackUnorm4x8(srcPixelsBlock[0]).xyz * 255.0f);
+		const uvec3 rgbVal = uvec3(unpackUnorm4x8(srcPixelsBlock[0]).xyz * 255.0f);
 		mask = 0xAAAAAAAAu;
 		maxEndp16 =
 				c_oMatch5[rgbVal.r][0] * 2048.0f + c_oMatch6[rgbVal.g][0] * 32.0f + c_oMatch5[rgbVal.b][0];
@@ -449,7 +449,7 @@ void main() {
 		// second step: pca+map along principal axis
 		OptimizeColorsBlock(ditherPixelsBlock, minEndp16, maxEndp16);
 		if (minEndp16 != maxEndp16) {
-			float3 colors[4];
+			vec3 colors[4];
 			EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted
 			mask = MatchColorsBlock(srcPixelsBlock, colors);
 		}
@@ -461,7 +461,7 @@ void main() {
 
 			if (RefineBlock(ditherPixelsBlock, mask, minEndp16, maxEndp16)) {
 				if (minEndp16 != maxEndp16) {
-					float3 colors[4];
+					vec3 colors[4];
 					EvalColors(colors, maxEndp16, minEndp16); // Note min/max are inverted
 					mask = MatchColorsBlock(srcPixelsBlock, colors);
 				} else {
@@ -482,10 +482,10 @@ void main() {
 		mask ^= 0x55555555u;
 	}
 
-	uint2 outputBytes;
+	uvec2 outputBytes;
 	outputBytes.x = uint(maxEndp16) | (uint(minEndp16) << 16u);
 	outputBytes.y = mask;
 
-	uint2 dstUV = gl_GlobalInvocationID.xy;
-	imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u));
+	uvec2 dstUV = gl_GlobalInvocationID.xy;
+	imageStore(dstTexture, ivec2(dstUV), uvec4(outputBytes.xy, 0u, 0u));
 }

+ 15 - 17
modules/betsy/bc4.glsl

@@ -6,12 +6,10 @@ signed = "#define SNORM";
 #[compute]
 #version 450
 
-#include "CrossPlatformSettings_piece_all.glsl"
-
 #VERSION_DEFINES
 
-shared float2 g_minMaxValues[4u * 4u * 4u];
-shared uint2 g_mask[4u * 4u];
+shared vec2 g_minMaxValues[4u * 4u * 4u];
+shared uvec2 g_mask[4u * 4u];
 
 layout(binding = 0) uniform sampler2D srcTex;
 layout(binding = 1, rg32ui) uniform restrict writeonly uimage2D dstTexture;
@@ -40,30 +38,30 @@ layout(local_size_x = 4, //
 ///  - Long threads (e.g. 1 thread per block) misses parallelism opportunities
 void main() {
 	float minVal, maxVal;
-	float4 srcPixel;
+	vec4 srcPixel;
 
 	const uint blockThreadId = gl_LocalInvocationID.x;
 
-	const uint2 pixelsToLoadBase = gl_GlobalInvocationID.yz << 2u;
+	const uvec2 pixelsToLoadBase = gl_GlobalInvocationID.yz << 2u;
 
 	for (uint i = 0u; i < 4u; ++i) {
-		const uint2 pixelsToLoad = pixelsToLoadBase + uint2(i, blockThreadId);
+		const uvec2 pixelsToLoad = pixelsToLoadBase + uvec2(i, blockThreadId);
 
-		const float4 value = OGRE_Load2D(srcTex, int2(pixelsToLoad), 0).xyzw;
+		const vec4 value = texelFetch(srcTex, ivec2(pixelsToLoad), 0).xyzw;
 		srcPixel[i] = params.p_channelIdx == 0 ? value.x : (params.p_channelIdx == 1 ? value.y : value.w);
 		srcPixel[i] *= 255.0f;
 	}
 
-	minVal = min3(srcPixel.x, srcPixel.y, srcPixel.z);
-	maxVal = max3(srcPixel.x, srcPixel.y, srcPixel.z);
+	minVal = min(srcPixel.x, min(srcPixel.y, srcPixel.z));
+	maxVal = max(srcPixel.x, max(srcPixel.y, srcPixel.z));
 	minVal = min(minVal, srcPixel.w);
 	maxVal = max(maxVal, srcPixel.w);
 
 	const uint minMaxIdxBase = (gl_LocalInvocationID.z << 4u) + (gl_LocalInvocationID.y << 2u);
 	const uint maskIdxBase = (gl_LocalInvocationID.z << 2u) + gl_LocalInvocationID.y;
 
-	g_minMaxValues[minMaxIdxBase + blockThreadId] = float2(minVal, maxVal);
-	g_mask[maskIdxBase] = uint2(0u, 0u);
+	g_minMaxValues[minMaxIdxBase + blockThreadId] = vec2(minVal, maxVal);
+	g_mask[maskIdxBase] = uvec2(0u, 0u);
 
 	memoryBarrierShared();
 	barrier();
@@ -133,21 +131,21 @@ void main() {
 
 	if (blockThreadId == 0u) {
 		// Save data
-		uint2 outputBytes;
+		uvec2 outputBytes;
 
 #ifdef SNORM
 		outputBytes.x =
-				packSnorm4x8(float4(maxVal * (1.0f / 255.0f) * 2.0f - 1.0f,
+				packSnorm4x8(vec4(maxVal * (1.0f / 255.0f) * 2.0f - 1.0f,
 						minVal * (1.0f / 255.0f) * 2.0f - 1.0f, 0.0f, 0.0f));
 #else
 		outputBytes.x = packUnorm4x8(
-				float4(maxVal * (1.0f / 255.0f), minVal * (1.0f / 255.0f), 0.0f, 0.0f));
+				vec4(maxVal * (1.0f / 255.0f), minVal * (1.0f / 255.0f), 0.0f, 0.0f));
 #endif
 
 		outputBytes.x |= g_mask[maskIdxBase].x;
 		outputBytes.y = g_mask[maskIdxBase].y;
 
-		uint2 dstUV = gl_GlobalInvocationID.yz;
-		imageStore(dstTexture, int2(dstUV), uint4(outputBytes.xy, 0u, 0u));
+		uvec2 dstUV = gl_GlobalInvocationID.yz;
+		imageStore(dstTexture, ivec2(dstUV), uvec4(outputBytes.xy, 0u, 0u));
 	}
 }

+ 145 - 146
modules/betsy/bc6h.glsl

@@ -6,24 +6,22 @@ unsigned = "#define QUALITY"; // The "Quality" preset causes artifacting on sign
 #[compute]
 #version 450
 
-#include "CrossPlatformSettings_piece_all.glsl"
-
 #VERSION_DEFINES
 
-float3 f32tof16(float3 value) {
-	return float3(packHalf2x16(float2(value.x, 0.0)),
-			packHalf2x16(float2(value.y, 0.0)),
-			packHalf2x16(float2(value.z, 0.0)));
+vec3 f32tof16(vec3 value) {
+	return vec3(packHalf2x16(vec2(value.x, 0.0)),
+			packHalf2x16(vec2(value.y, 0.0)),
+			packHalf2x16(vec2(value.z, 0.0)));
 }
 
-float3 f16tof32(uint3 value) {
-	return float3(unpackHalf2x16(value.x).x,
+vec3 f16tof32(uvec3 value) {
+	return vec3(unpackHalf2x16(value.x).x,
 			unpackHalf2x16(value.y).x,
 			unpackHalf2x16(value.z).x);
 }
 
 float f32tof16(float value) {
-	return packHalf2x16(float2(value.x, 0.0));
+	return packHalf2x16(vec2(value.x, 0.0));
 }
 
 float f16tof32(uint value) {
@@ -34,7 +32,7 @@ layout(binding = 0) uniform sampler2D srcTexture;
 layout(binding = 1, rgba32ui) uniform restrict writeonly uimage2D dstTexture;
 
 layout(push_constant, std430) uniform Params {
-	float2 p_textureSizeRcp;
+	vec2 p_textureSizeRcp;
 	uint padding0;
 	uint padding1;
 }
@@ -69,7 +67,7 @@ float CrossCalcMSLE(float a, float b) {
 	return result;
 }
 
-float CalcMSLE(float3 a, float3 b) {
+float CalcMSLE(vec3 a, vec3 b) {
 	float result = 0.0f;
 	if (isNegative(a.x) != isNegative(b.x)) {
 		result += CrossCalcMSLE(a.x, b.x);
@@ -91,32 +89,32 @@ float CalcMSLE(float3 a, float3 b) {
 }
 
 // Adapt the log function to make sense when a < 0
-float3 customLog2(float3 a) {
-	return float3(
+vec3 customLog2(vec3 a) {
+	return vec3(
 			a.x >= 0 ? log2(a.x + 1.0f) : -log2(-a.x + 1.0f),
 			a.y >= 0 ? log2(a.y + 1.0f) : -log2(-a.y + 1.0f),
 			a.z >= 0 ? log2(a.z + 1.0f) : -log2(-a.z + 1.0f));
 }
 
 // Inverse of customLog2()
-float3 customExp2(float3 a) {
-	return float3(
+vec3 customExp2(vec3 a) {
+	return vec3(
 			a.x >= 0 ? exp2(a.x) - 1.0f : -(exp2(-a.x) - 1.0f),
 			a.y >= 0 ? exp2(a.y) - 1.0f : -(exp2(-a.y) - 1.0f),
 			a.z >= 0 ? exp2(a.z) - 1.0f : -(exp2(-a.z) - 1.0f));
 }
 #else
-float CalcMSLE(float3 a, float3 b) {
-	float3 err = log2((b + 1.0f) / (a + 1.0f));
+float CalcMSLE(vec3 a, vec3 b) {
+	vec3 err = log2((b + 1.0f) / (a + 1.0f));
 	err = err * err;
 	return err.x + err.y + err.z;
 }
 
-float3 customLog2(float3 a) {
+vec3 customLog2(vec3 a) {
 	return log2(a + 1.0f);
 }
 
-float3 customExp2(float3 a) {
+vec3 customExp2(vec3 a) {
 	return exp2(a) - 1.0f;
 }
 #endif
@@ -157,98 +155,98 @@ uint Pattern(uint p, uint i) {
 
 #ifndef SIGNED
 //UF
-float3 Quantize7(float3 x) {
+vec3 Quantize7(vec3 x) {
 	return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f);
 }
 
-float3 Quantize9(float3 x) {
+vec3 Quantize9(vec3 x) {
 	return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f);
 }
 
-float3 Quantize10(float3 x) {
+vec3 Quantize10(vec3 x) {
 	return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f);
 }
 
-float3 Unquantize7(float3 x) {
+vec3 Unquantize7(vec3 x) {
 	return (x * 65536.0f + 0x8000) / 128.0f;
 }
 
-float3 Unquantize9(float3 x) {
+vec3 Unquantize9(vec3 x) {
 	return (x * 65536.0f + 0x8000) / 512.0f;
 }
 
-float3 Unquantize10(float3 x) {
+vec3 Unquantize10(vec3 x) {
 	return (x * 65536.0f + 0x8000) / 1024.0f;
 }
 
-float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) {
-	float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f);
-	return f16tof32(uint3(comp));
+vec3 FinishUnquantize(vec3 endpoint0Unq, vec3 endpoint1Unq, float weight) {
+	vec3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f);
+	return f16tof32(uvec3(comp));
 }
 #else
 //SF
 
-float3 cmpSign(float3 value) {
-	float3 signVal;
+vec3 cmpSign(vec3 value) {
+	vec3 signVal;
 	signVal.x = value.x >= 0.0f ? 1.0f : -1.0f;
 	signVal.y = value.y >= 0.0f ? 1.0f : -1.0f;
 	signVal.z = value.z >= 0.0f ? 1.0f : -1.0f;
 	return signVal;
 }
 
-float3 Quantize7(float3 x) {
-	float3 signVal = cmpSign(x);
+vec3 Quantize7(vec3 x) {
+	vec3 signVal = cmpSign(x);
 	return signVal * (f32tof16(abs(x)) * 64.0f) / (0x7bff + 1.0f);
 }
 
-float3 Quantize9(float3 x) {
-	float3 signVal = cmpSign(x);
+vec3 Quantize9(vec3 x) {
+	vec3 signVal = cmpSign(x);
 	return signVal * (f32tof16(abs(x)) * 256.0f) / (0x7bff + 1.0f);
 }
 
-float3 Quantize10(float3 x) {
-	float3 signVal = cmpSign(x);
+vec3 Quantize10(vec3 x) {
+	vec3 signVal = cmpSign(x);
 	return signVal * (f32tof16(abs(x)) * 512.0f) / (0x7bff + 1.0f);
 }
 
-float3 Unquantize7(float3 x) {
-	float3 signVal = sign(x);
+vec3 Unquantize7(vec3 x) {
+	vec3 signVal = sign(x);
 	x = abs(x);
-	float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f;
+	vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f;
 	finalVal.x = x.x >= 64.0f ? 32767.0 : finalVal.x;
 	finalVal.y = x.y >= 64.0f ? 32767.0 : finalVal.y;
 	finalVal.z = x.z >= 64.0f ? 32767.0 : finalVal.z;
 	return finalVal;
 }
 
-float3 Unquantize9(float3 x) {
-	float3 signVal = sign(x);
+vec3 Unquantize9(vec3 x) {
+	vec3 signVal = sign(x);
 	x = abs(x);
-	float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f;
+	vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f;
 	finalVal.x = x.x >= 256.0f ? 32767.0 : finalVal.x;
 	finalVal.y = x.y >= 256.0f ? 32767.0 : finalVal.y;
 	finalVal.z = x.z >= 256.0f ? 32767.0 : finalVal.z;
 	return finalVal;
 }
 
-float3 Unquantize10(float3 x) {
-	float3 signVal = sign(x);
+vec3 Unquantize10(vec3 x) {
+	vec3 signVal = sign(x);
 	x = abs(x);
-	float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f;
+	vec3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f;
 	finalVal.x = x.x >= 512.0f ? 32767.0 : finalVal.x;
 	finalVal.y = x.y >= 512.0f ? 32767.0 : finalVal.y;
 	finalVal.z = x.z >= 512.0f ? 32767.0 : finalVal.z;
 	return finalVal;
 }
 
-float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) {
-	float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f);
-	return f16tof32(uint3(comp));
+vec3 FinishUnquantize(vec3 endpoint0Unq, vec3 endpoint1Unq, float weight) {
+	vec3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f);
+	return f16tof32(uvec3(comp));
 }
 #endif
 
-void Swap(inout float3 a, inout float3 b) {
-	float3 tmp = a;
+void Swap(inout vec3 a, inout vec3 b) {
+	vec3 tmp = a;
 	a = b;
 	b = tmp;
 }
@@ -270,8 +268,8 @@ uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos) {
 }
 
 // This adds a bitflag to quantized values that signifies whether they are negative.
-void SignExtend(inout float3 v1, uint mask, uint signFlag) {
-	int3 v = int3(v1);
+void SignExtend(inout vec3 v1, uint mask, uint signFlag) {
+	ivec3 v = ivec3(v1);
 	v.x = (v.x & int(mask)) | (v.x < 0 ? int(signFlag) : 0);
 	v.y = (v.y & int(mask)) | (v.y < 0 ? int(signFlag) : 0);
 	v.z = (v.z & int(mask)) | (v.z < 0 ? int(signFlag) : 0);
@@ -279,38 +277,39 @@ void SignExtend(inout float3 v1, uint mask, uint signFlag) {
 }
 
 // Encodes a block with mode 11 (2x 10-bit endpoints).
-void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) {
+void EncodeP1(inout uvec4 block, inout float blockMSLE, vec3 texels[16]) {
 	// compute endpoints (min/max RGB bbox)
-	float3 blockMin = texels[0];
-	float3 blockMax = texels[0];
+	vec3 blockMin = texels[0];
+	vec3 blockMax = texels[0];
 	for (uint i = 1u; i < 16u; ++i) {
 		blockMin = min(blockMin, texels[i]);
 		blockMax = max(blockMax, texels[i]);
 	}
 
 	// refine endpoints in log2 RGB space
-	float3 refinedBlockMin = blockMax;
-	float3 refinedBlockMax = blockMin;
+	vec3 refinedBlockMin = blockMax;
+	vec3 refinedBlockMax = blockMin;
 	for (uint i = 0u; i < 16u; ++i) {
 		refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]);
 		refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]);
 	}
 
-	float3 logBlockMax = customLog2(blockMax);
-	float3 logBlockMin = customLog2(blockMin);
-	float3 logRefinedBlockMax = customLog2(refinedBlockMax);
-	float3 logRefinedBlockMin = customLog2(refinedBlockMin);
-	float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f);
+	vec3 logBlockMax = customLog2(blockMax);
+	vec3 logBlockMin = customLog2(blockMin);
+	vec3 logRefinedBlockMax = customLog2(refinedBlockMax);
+	vec3 logRefinedBlockMin = customLog2(refinedBlockMin);
+	vec3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f);
+
 	logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt);
 	logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt);
 	blockMin = customExp2(logBlockMin);
 	blockMax = customExp2(logBlockMax);
 
-	float3 blockDir = blockMax - blockMin;
+	vec3 blockDir = blockMax - blockMin;
 	blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z);
 
-	float3 endpoint0 = Quantize10(blockMin);
-	float3 endpoint1 = Quantize10(blockMax);
+	vec3 endpoint0 = Quantize10(blockMin);
+	vec3 endpoint1 = Quantize10(blockMax);
 	float endPoint0Pos = f32tof16(dot(blockMin, blockDir));
 	float endPoint1Pos = f32tof16(dot(blockMax, blockDir));
 
@@ -336,12 +335,12 @@ void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) {
 	}
 
 	// compute compression error (MSLE)
-	float3 endpoint0Unq = Unquantize10(endpoint0);
-	float3 endpoint1Unq = Unquantize10(endpoint1);
+	vec3 endpoint0Unq = Unquantize10(endpoint0);
+	vec3 endpoint1Unq = Unquantize10(endpoint1);
 	float msle = 0.0f;
 	for (uint i = 0u; i < 16u; ++i) {
 		float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f);
-		float3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight);
+		vec3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight);
 
 		msle += CalcMSLE(texels[i], texelUnc);
 	}
@@ -384,19 +383,19 @@ void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) {
 	block.w |= indices[15] << 28u;
 }
 
-float DistToLineSq(float3 PointOnLine, float3 LineDirection, float3 Point) {
-	float3 w = Point - PointOnLine;
-	float3 x = w - dot(w, LineDirection) * LineDirection;
+float DistToLineSq(vec3 PointOnLine, vec3 LineDirection, vec3 Point) {
+	vec3 w = Point - PointOnLine;
+	vec3 x = w - dot(w, LineDirection) * LineDirection;
 
 	return dot(x, x);
 }
 
 // Gets the deviation from the source data of a particular pattern (smaller is better).
-float EvaluateP2Pattern(uint pattern, float3 texels[16]) {
-	float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
-	float3 p0BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN);
-	float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
-	float3 p1BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN);
+float EvaluateP2Pattern(uint pattern, vec3 texels[16]) {
+	vec3 p0BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
+	vec3 p0BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
+	vec3 p1BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
+	vec3 p1BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
 
 	for (uint i = 0; i < 16; ++i) {
 		uint paletteID = Pattern(pattern, i);
@@ -409,8 +408,8 @@ float EvaluateP2Pattern(uint pattern, float3 texels[16]) {
 		}
 	}
 
-	float3 p0BlockDir = normalize(p0BlockMax - p0BlockMin);
-	float3 p1BlockDir = normalize(p1BlockMax - p1BlockMin);
+	vec3 p0BlockDir = normalize(p0BlockMax - p0BlockMin);
+	vec3 p1BlockDir = normalize(p1BlockMax - p1BlockMin);
 
 	float sqDistanceFromLine = 0.0f;
 
@@ -427,11 +426,11 @@ float EvaluateP2Pattern(uint pattern, float3 texels[16]) {
 }
 
 // Encodes a block with either mode 2 (7-bit base, 3x 6-bit delta), or mode 6 (9-bit base, 3x 5-bit delta). Both use pattern encoding.
-void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, float3 texels[16]) {
-	float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
-	float3 p0BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN);
-	float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
-	float3 p1BlockMax = float3(HALF_MIN, HALF_MIN, HALF_MIN);
+void EncodeP2Pattern(inout uvec4 block, inout float blockMSLE, uint pattern, vec3 texels[16]) {
+	vec3 p0BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
+	vec3 p0BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
+	vec3 p1BlockMin = vec3(HALF_MAX, HALF_MAX, HALF_MAX);
+	vec3 p1BlockMax = vec3(HALF_MIN, HALF_MIN, HALF_MIN);
 
 	for (uint i = 0u; i < 16u; ++i) {
 		uint paletteID = Pattern(pattern, i);
@@ -444,8 +443,8 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
 		}
 	}
 
-	float3 p0BlockDir = p0BlockMax - p0BlockMin;
-	float3 p1BlockDir = p1BlockMax - p1BlockMin;
+	vec3 p0BlockDir = p0BlockMax - p0BlockMin;
+	vec3 p1BlockDir = p1BlockMax - p1BlockMin;
 	p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z);
 	p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z);
 
@@ -479,15 +478,15 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
 		indices[i] = paletteID == 0u ? p0Index : p1Index;
 	}
 
-	float3 endpoint760 = floor(Quantize7(p0BlockMin));
-	float3 endpoint761 = floor(Quantize7(p0BlockMax));
-	float3 endpoint762 = floor(Quantize7(p1BlockMin));
-	float3 endpoint763 = floor(Quantize7(p1BlockMax));
+	vec3 endpoint760 = floor(Quantize7(p0BlockMin));
+	vec3 endpoint761 = floor(Quantize7(p0BlockMax));
+	vec3 endpoint762 = floor(Quantize7(p1BlockMin));
+	vec3 endpoint763 = floor(Quantize7(p1BlockMax));
 
-	float3 endpoint950 = floor(Quantize9(p0BlockMin));
-	float3 endpoint951 = floor(Quantize9(p0BlockMax));
-	float3 endpoint952 = floor(Quantize9(p1BlockMin));
-	float3 endpoint953 = floor(Quantize9(p1BlockMax));
+	vec3 endpoint950 = floor(Quantize9(p0BlockMin));
+	vec3 endpoint951 = floor(Quantize9(p0BlockMax));
+	vec3 endpoint952 = floor(Quantize9(p1BlockMin));
+	vec3 endpoint953 = floor(Quantize9(p1BlockMax));
 
 	endpoint761 = endpoint761 - endpoint760;
 	endpoint762 = endpoint762 - endpoint760;
@@ -514,28 +513,28 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
 	endpoint950 = clamp(endpoint950, -maxVal9, maxVal9);
 #endif
 
-	float3 endpoint760Unq = Unquantize7(endpoint760);
-	float3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761);
-	float3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762);
-	float3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763);
-	float3 endpoint950Unq = Unquantize9(endpoint950);
-	float3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951);
-	float3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952);
-	float3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953);
+	vec3 endpoint760Unq = Unquantize7(endpoint760);
+	vec3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761);
+	vec3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762);
+	vec3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763);
+	vec3 endpoint950Unq = Unquantize9(endpoint950);
+	vec3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951);
+	vec3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952);
+	vec3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953);
 
 	float msle76 = 0.0f;
 	float msle95 = 0.0f;
 	for (uint i = 0u; i < 16u; ++i) {
 		uint paletteID = Pattern(pattern, i);
 
-		float3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq;
-		float3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq;
-		float3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq;
-		float3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq;
+		vec3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq;
+		vec3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq;
+		vec3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq;
+		vec3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq;
 
 		float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f);
-		float3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight);
-		float3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight);
+		vec3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight);
+		vec3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight);
 
 		msle76 += CalcMSLE(texels[i], texelUnc76);
 		msle95 += CalcMSLE(texels[i], texelUnc95);
@@ -558,7 +557,7 @@ void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, flo
 	float p2MSLE = min(msle76, msle95);
 	if (p2MSLE < blockMSLE) {
 		blockMSLE = p2MSLE;
-		block = uint4(0u, 0u, 0u, 0u);
+		block = uvec4(0u, 0u, 0u, 0u);
 
 		if (p2MSLE == msle76) {
 			// 7.6
@@ -681,43 +680,43 @@ void main() {
 	// 4 5 6 7
 	// 8 9 10 11
 	// 12 13 14 15
-	float2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp;
-	float2 block0UV = uv;
-	float2 block1UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 0.0f);
-	float2 block2UV = uv + float2(0.0f, 2.0f * params.p_textureSizeRcp.y);
-	float2 block3UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y);
-	float4 block0X = OGRE_GatherRed(srcTexture, pointSampler, block0UV);
-	float4 block1X = OGRE_GatherRed(srcTexture, pointSampler, block1UV);
-	float4 block2X = OGRE_GatherRed(srcTexture, pointSampler, block2UV);
-	float4 block3X = OGRE_GatherRed(srcTexture, pointSampler, block3UV);
-	float4 block0Y = OGRE_GatherGreen(srcTexture, pointSampler, block0UV);
-	float4 block1Y = OGRE_GatherGreen(srcTexture, pointSampler, block1UV);
-	float4 block2Y = OGRE_GatherGreen(srcTexture, pointSampler, block2UV);
-	float4 block3Y = OGRE_GatherGreen(srcTexture, pointSampler, block3UV);
-	float4 block0Z = OGRE_GatherBlue(srcTexture, pointSampler, block0UV);
-	float4 block1Z = OGRE_GatherBlue(srcTexture, pointSampler, block1UV);
-	float4 block2Z = OGRE_GatherBlue(srcTexture, pointSampler, block2UV);
-	float4 block3Z = OGRE_GatherBlue(srcTexture, pointSampler, block3UV);
-
-	float3 texels[16];
-	texels[0] = float3(block0X.w, block0Y.w, block0Z.w);
-	texels[1] = float3(block0X.z, block0Y.z, block0Z.z);
-	texels[2] = float3(block1X.w, block1Y.w, block1Z.w);
-	texels[3] = float3(block1X.z, block1Y.z, block1Z.z);
-	texels[4] = float3(block0X.x, block0Y.x, block0Z.x);
-	texels[5] = float3(block0X.y, block0Y.y, block0Z.y);
-	texels[6] = float3(block1X.x, block1Y.x, block1Z.x);
-	texels[7] = float3(block1X.y, block1Y.y, block1Z.y);
-	texels[8] = float3(block2X.w, block2Y.w, block2Z.w);
-	texels[9] = float3(block2X.z, block2Y.z, block2Z.z);
-	texels[10] = float3(block3X.w, block3Y.w, block3Z.w);
-	texels[11] = float3(block3X.z, block3Y.z, block3Z.z);
-	texels[12] = float3(block2X.x, block2Y.x, block2Z.x);
-	texels[13] = float3(block2X.y, block2Y.y, block2Z.y);
-	texels[14] = float3(block3X.x, block3Y.x, block3Z.x);
-	texels[15] = float3(block3X.y, block3Y.y, block3Z.y);
-
-	uint4 block = uint4(0u, 0u, 0u, 0u);
+	vec2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp;
+	vec2 block0UV = uv;
+	vec2 block1UV = uv + vec2(2.0f * params.p_textureSizeRcp.x, 0.0f);
+	vec2 block2UV = uv + vec2(0.0f, 2.0f * params.p_textureSizeRcp.y);
+	vec2 block3UV = uv + vec2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y);
+	vec4 block0X = textureGather(srcTexture, block0UV, 0);
+	vec4 block1X = textureGather(srcTexture, block1UV, 0);
+	vec4 block2X = textureGather(srcTexture, block2UV, 0);
+	vec4 block3X = textureGather(srcTexture, block3UV, 0);
+	vec4 block0Y = textureGather(srcTexture, block0UV, 1);
+	vec4 block1Y = textureGather(srcTexture, block1UV, 1);
+	vec4 block2Y = textureGather(srcTexture, block2UV, 1);
+	vec4 block3Y = textureGather(srcTexture, block3UV, 1);
+	vec4 block0Z = textureGather(srcTexture, block0UV, 2);
+	vec4 block1Z = textureGather(srcTexture, block1UV, 2);
+	vec4 block2Z = textureGather(srcTexture, block2UV, 2);
+	vec4 block3Z = textureGather(srcTexture, block3UV, 2);
+
+	vec3 texels[16];
+	texels[0] = vec3(block0X.w, block0Y.w, block0Z.w);
+	texels[1] = vec3(block0X.z, block0Y.z, block0Z.z);
+	texels[2] = vec3(block1X.w, block1Y.w, block1Z.w);
+	texels[3] = vec3(block1X.z, block1Y.z, block1Z.z);
+	texels[4] = vec3(block0X.x, block0Y.x, block0Z.x);
+	texels[5] = vec3(block0X.y, block0Y.y, block0Z.y);
+	texels[6] = vec3(block1X.x, block1Y.x, block1Z.x);
+	texels[7] = vec3(block1X.y, block1Y.y, block1Z.y);
+	texels[8] = vec3(block2X.w, block2Y.w, block2Z.w);
+	texels[9] = vec3(block2X.z, block2Y.z, block2Z.z);
+	texels[10] = vec3(block3X.w, block3Y.w, block3Z.w);
+	texels[11] = vec3(block3X.z, block3Y.z, block3Z.z);
+	texels[12] = vec3(block2X.x, block2Y.x, block2Z.x);
+	texels[13] = vec3(block2X.y, block2Y.y, block2Z.y);
+	texels[14] = vec3(block3X.x, block3Y.x, block3Z.x);
+	texels[15] = vec3(block3X.y, block3Y.y, block3Z.y);
+
+	uvec4 block = uvec4(0u, 0u, 0u, 0u);
 	float blockMSLE = 0.0f;
 
 	EncodeP1(block, blockMSLE, texels);
@@ -738,5 +737,5 @@ void main() {
 	EncodeP2Pattern(block, blockMSLE, bestPattern, texels);
 #endif
 
-	imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), block);
+	imageStore(dstTexture, ivec2(gl_GlobalInvocationID.xy), block);
 }