Browse Source

Found a bug in clamping of mipLevel in texture_sample_bilinear, but it is much faster to just clamp it as a single scalar in texture_getMipLevelIndex, so MIP_INSIDE is currently mandatory until I find an efficient way of limiting mip levels on Intel processors.

David Piuva 6 months ago
parent
commit
ba162b1f68

+ 10 - 1
Source/DFPSR/api/textureAPI.h

@@ -340,6 +340,13 @@ namespace dsr {
 	    DSR_CHECK_PROPERTY(DsrTrait_Any_U32, M)
 	  )>
 	inline auto texture_sample_bilinear(const TextureRgbaU8 &texture, const F32 &u, const F32 &v, const M &mipLevel) {
+		if (!MIP_INSIDE) {
+			// TODO: Either handle all special cases with mipLevel out of bound, or make an integer mimimum
+			//         function that is emulated with scalar operations when not available in hardware.
+			//       Clamping the MIP level in texture_getMipLevelIndex will probably always be faster anyway,
+			//         because there it can be clamped as a single scalar integer for muliple pixels.
+			throwError(U"MIP_INSIDE is currently mandatory for texture_sample_bilinear because some processors do not have integer minimum functions for limiting mipLevel efficiently!\n");
+		}
 		M scaleU = M(256u << texture.impl_log2width);
 		M scaleV = M(256u << texture.impl_log2height);
 		if (!HIGHEST_RESOLUTION) {
@@ -481,7 +488,9 @@ namespace dsr {
 		if (offset >  4.0f) { result = 2; }
 		if (offset >  8.0f) { result = 3; }
 		if (offset > 16.0f) { result = 4; }
-		// TODO: Should it be possible to configure the number of mip levels?
+		// Clamp the MIP level.
+		int32_t maxMip = texture_getSmallestMipLevel(source);
+		if (result > maxMip) { result = maxMip; }
 		return result;
 	}
 

+ 4 - 4
Source/DFPSR/implementation/render/shader/RgbaMultiply.h

@@ -78,12 +78,12 @@ inline Rgba_F32<U32x4, F32x4> getPixels_2x2(void *data, const F32x4x3 &vertexWei
 		// Optimized for diffuse only
 		F32x4 u1 = shaderMethods::interpolate(assets->texCoords.u1, vertexWeights);
 		F32x4 v1 = shaderMethods::interpolate(assets->texCoords.v1, vertexWeights);
-		return shaderMethods::sample_F32<Interpolation::BL, false, DIFFUSE_SINGLE_LAYER, false, false, false>(assets->diffuseMap, u1, v1);
+		return shaderMethods::sample_F32<Interpolation::BL, false, DIFFUSE_SINGLE_LAYER, false, false>(assets->diffuseMap, u1, v1);
 	} else if (HAS_LIGHT_MAP && !HAS_DIFFUSE_MAP && COLORLESS) {
 		// Optimized for light only
 		F32x4 u2 = shaderMethods::interpolate(assets->texCoords.u2, vertexWeights);
 		F32x4 v2 = shaderMethods::interpolate(assets->texCoords.v2, vertexWeights);
-		return shaderMethods::sample_F32<Interpolation::BL, false, false, false, false, true>(assets->lightMap, u2, v2);
+		return shaderMethods::sample_F32<Interpolation::BL, false, false, false, true>(assets->lightMap, u2, v2);
 	} else {
 		// Interpolate the vertex color
 		Rgba_F32<U32x4, F32x4> color = HAS_VERTEX_FADING ?
@@ -93,13 +93,13 @@ inline Rgba_F32<U32x4, F32x4> getPixels_2x2(void *data, const F32x4x3 &vertexWei
 		if (HAS_DIFFUSE_MAP) {
 			F32x4 u1 = shaderMethods::interpolate(assets->texCoords.u1, vertexWeights);
 			F32x4 v1 = shaderMethods::interpolate(assets->texCoords.v1, vertexWeights);
-			color = color * shaderMethods::sample_F32<Interpolation::BL, false, DIFFUSE_SINGLE_LAYER, false, false, false>(assets->diffuseMap, u1, v1);
+			color = color * shaderMethods::sample_F32<Interpolation::BL, false, DIFFUSE_SINGLE_LAYER, false, false>(assets->diffuseMap, u1, v1);
 		}
 		// Sample lightmap
 		if (HAS_LIGHT_MAP) {
 			F32x4 u2 = shaderMethods::interpolate(assets->texCoords.u2, vertexWeights);
 			F32x4 v2 = shaderMethods::interpolate(assets->texCoords.v2, vertexWeights);
-			color = color * shaderMethods::sample_F32<Interpolation::BL, false, false, false, false, true>(assets->lightMap, u2, v2);
+			color = color * shaderMethods::sample_F32<Interpolation::BL, false, false, false, true>(assets->lightMap, u2, v2);
 		}
 		return color;
 	}

+ 6 - 7
Source/DFPSR/implementation/render/shader/shaderMethods.h

@@ -61,24 +61,24 @@ namespace shaderMethods {
 	  bool SQUARE = false,
 	  bool SINGLE_LAYER = false,
 	  bool XY_INSIDE = false,
-	  bool MIP_INSIDE = false,
 	  bool HIGHEST_RESOLUTION = false
 	>
 	inline U32x4 sample_U32(const TextureRgbaU8 &source, const F32x4 &u, const F32x4 &v) {
+		// Because constant level 0 and the result of texture_getMipLevelIndex will be within bound, we can assume that the MIP level is inside and set MIP_INSIDE to true.
 		if (INTERPOLATION == Interpolation::NN) {
 			if (HIGHEST_RESOLUTION) {
-				return texture_sample_nearest<SQUARE, SINGLE_LAYER, MIP_INSIDE, HIGHEST_RESOLUTION>(source, u, v, 0u);
+				return texture_sample_nearest<SQUARE, SINGLE_LAYER, true, HIGHEST_RESOLUTION>(source, u, v, 0u);
 			} else {
 				// TODO: Calculate MIP levels using a separate rendering stage with sparse resolution writing results into thread-local memory.
 				uint32_t mipLevel = texture_getMipLevelIndex<F32x4>(source, u, v);
-				return texture_sample_nearest<SQUARE, SINGLE_LAYER, MIP_INSIDE, HIGHEST_RESOLUTION>(source, u, v, mipLevel);
+				return texture_sample_nearest<SQUARE, SINGLE_LAYER, true, HIGHEST_RESOLUTION>(source, u, v, mipLevel);
 			}
 		} else {
 			if (HIGHEST_RESOLUTION) {
-				return texture_sample_bilinear<SQUARE, SINGLE_LAYER, MIP_INSIDE, HIGHEST_RESOLUTION>(source, u, v, 0u);
+				return texture_sample_bilinear<SQUARE, SINGLE_LAYER, true, HIGHEST_RESOLUTION>(source, u, v, 0u);
 			} else {
 				uint32_t mipLevel = texture_getMipLevelIndex<F32x4>(source, u, v);
-				return texture_sample_bilinear<SQUARE, SINGLE_LAYER, MIP_INSIDE, HIGHEST_RESOLUTION>(source, u, v, mipLevel);
+				return texture_sample_bilinear<SQUARE, SINGLE_LAYER, true, HIGHEST_RESOLUTION>(source, u, v, mipLevel);
 			}
 		}
 	}
@@ -87,11 +87,10 @@ namespace shaderMethods {
 	  bool SQUARE = false,
 	  bool SINGLE_LAYER = false,
 	  bool XY_INSIDE = false,
-	  bool MIP_INSIDE = false,
 	  bool HIGHEST_RESOLUTION = false
 	>
 	inline Rgba_F32<U32x4, F32x4> sample_F32(const TextureRgbaU8 &source, const F32x4 &u, const F32x4 &v) {
-		return Rgba_F32<U32x4, F32x4>(sample_U32<INTERPOLATION, SQUARE, SINGLE_LAYER, XY_INSIDE, MIP_INSIDE, HIGHEST_RESOLUTION>(source, u, v));
+		return Rgba_F32<U32x4, F32x4>(sample_U32<INTERPOLATION, SQUARE, SINGLE_LAYER, XY_INSIDE, HIGHEST_RESOLUTION>(source, u, v));
 	}
 }