Browse Source

Fixed the stretched texture bug on ARM NEON.

David Piuva 7 months ago
parent
commit
d5aaf36e57
3 changed files with 59 additions and 9 deletions
  1. 13 6
      Source/DFPSR/api/textureAPI.h
  2. 11 3
      Source/test/tests/SimdTest.cpp
  3. 35 0
      Source/test/tests/TextureTest.cpp

+ 13 - 6
Source/DFPSR/api/textureAPI.h

@@ -219,8 +219,8 @@ namespace dsr {
 	// TODO: Can EXISTS be an argument to disable when non-existing images should be replaced with U(255u) for fast prototyping?
 	// Sample the nearest pixel in a normalized UV scale where one unit equals one lap around the image.
 	// Pre-condition:
-	//   0.0f <= u, 0.0f <= v
-	//   Negative texture coordinates are not allowed, because they are converted to unsigned integers for bitwise operations.
+	//   -256.0f <= u, -256.0f <= v
+	//   Negative texture coordinates may not go below -256, or else they will be stretched out on ARM NEON.
 	template<
 	  bool SQUARE = false,
 	  bool SINGLE_LAYER = false,
@@ -236,8 +236,10 @@ namespace dsr {
 			scaleU = scaleU >> mipLevel;
 			scaleV = scaleV >> mipLevel;
 		}
-		auto xPixel = truncateToU32(u * floatFromU32(scaleU));
-		auto yPixel = truncateToU32(v * floatFromU32(scaleV));
+		// A constant offset applied to texture coordinates to allow using negative coordinates.
+		static const float wrapOffset = 256.0f;
+		auto xPixel = truncateToU32((u + wrapOffset) * floatFromU32(scaleU));
+		auto yPixel = truncateToU32((v + wrapOffset) * floatFromU32(scaleV));
 		return texture_readPixel<SQUARE, SINGLE_LAYER, false, MIP_INSIDE, HIGHEST_RESOLUTION>(texture, xPixel, yPixel, mipLevel);
 	}
 
@@ -323,6 +325,9 @@ namespace dsr {
 		return weightColors(weightColors(colorA, weightXL, colorB, weightXR), weightYT, weightColors(colorC, weightXL, colorD, weightXR), weightYB);
 	}
 
+	// Pre-condition:
+	//   -256.0f <= u, -256.0f <= v
+	//   Negative texture coordinates may not go below -256, or else they will be stretched out on ARM NEON.
 	template<
 	  bool SQUARE = false,
 	  bool SINGLE_LAYER = false,
@@ -341,10 +346,12 @@ namespace dsr {
 			scaleU = scaleU >> mipLevel;
 			scaleV = scaleV >> mipLevel;
 		}
+		// A constant offset applied to texture coordinates to allow using negative coordinates.
+		static const float wrapOffset = 256.0f;
 		// Convert from the normalized 0..1 scale to a 0..size*256 scale for 8 bits of sub-pixel precision.
 		//   Half a pixel is subtracted so that the seam between bi-linear patches end up at the center of texels.
-		auto subCenterX = truncateToU32(u * floatFromU32(scaleU)) - 128u;
-		auto subCenterY = truncateToU32(v * floatFromU32(scaleV)) - 128u;
+		auto subCenterX = truncateToU32((u + wrapOffset) * floatFromU32(scaleU)) - 128u;
+		auto subCenterY = truncateToU32((v + wrapOffset) * floatFromU32(scaleV)) - 128u;
 		// Get the remainders as interpolation weights.
 		auto weightX = subCenterX & 0xFF;
 		auto weightY = subCenterY & 0xFF;

+ 11 - 3
Source/test/tests/SimdTest.cpp

@@ -3,9 +3,7 @@
 #include "../../DFPSR/base/simd.h"
 #include "../../DFPSR/base/endian.h"
 
-// TODO: Test: allLanesNotEqual, allLanesLesser, allLanesGreater, allLanesLesserOrEqual, allLanesGreaterOrEqual, operand ~, smaller bit shifts.
-// TODO: Test that truncateToU32 saturates to minimum and maximum values.
-// TODO: Test that truncateToI32 saturates to minimum and maximum values.
+// TODO: Test: allLanesNotEqual, allLanesLesser, allLanesGreater, allLanesLesserOrEqual, allLanesGreaterOrEqual, operand ~.
 // TODO: Set up a test where SIMD is disabled to force using the reference implementation.
 // TODO: Keep the reference implementation alongside the SIMD types during brute-force testing with millions of random inputs.
 
@@ -902,6 +900,16 @@ START_TEST(Simd)
 	ASSERT_EQUAL_SIMD(clampUpper(F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f), F32x8(1.5f)), F32x8(-35.1f, 1.0f, 1.5f, 1.5f, 0.0f, -1.0f, 1.5f, -1.9f));
 	ASSERT_EQUAL_SIMD(clampLower(F32x8(-1.5f), F32x8(-35.1f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.9f)), F32x8(-1.5f, 1.0f, 2.0f, 45.7f, 0.0f, -1.0f, 2.1f, -1.5f));
 
+	// Float to integer conversions
+	// Underflow and overflow is undefined behavior, because NEON will clamp out of bound values while SSE will truncate away higher bits.
+	ASSERT_EQUAL_SIMD(truncateToU32(F32x4(0.01f, 0.99f, 1.01f, 1.99f)),U32x4(0, 0, 1, 1));
+	ASSERT_EQUAL_SIMD(truncateToI32(F32x4(0.01f, 0.99f, 1.01f, 1.99f)),I32x4(0, 0, 1, 1));
+	ASSERT_EQUAL_SIMD(truncateToI32(F32x4(-0.01f, -0.99f, -1.01f, -1.99f)),I32x4(0, 0, -1, -1));
+	ASSERT_EQUAL_SIMD(truncateToU32(F32x4(0.1f, 5.4f, 2.6f, 4.9f)),U32x4(0, 5, 2, 4));
+	ASSERT_EQUAL_SIMD(truncateToI32(F32x4(0.1f, 5.4f, 2.6f, 4.9f)),I32x4(0, 5, 2, 4));
+	ASSERT_EQUAL_SIMD(truncateToI32(F32x4(-1.1f, -0.9f, -0.1f, 0.1f)),I32x4(-1, 0, 0, 0));
+	ASSERT_EQUAL_SIMD(truncateToI32(F32x4(-1000.9f, -23.4f, 123456.7f, 846.999f)),I32x4(-1000, -23, 123456, 846));
+
 	// F32x4 operations
 	ASSERT_EQUAL_SIMD(F32x4(1.1f, -2.2f, 3.3f, 4.0f) + F32x4(2.2f, -4.4f, 6.6f, 8.0f), F32x4(3.3f, -6.6f, 9.9f, 12.0f));
 	ASSERT_EQUAL_SIMD(F32x4(-1.5f, -0.5f, 0.5f, 1.5f) + 1.0f, F32x4(-0.5f, 0.5f, 1.5f, 2.5f));

+ 35 - 0
Source/test/tests/TextureTest.cpp

@@ -455,6 +455,38 @@ START_TEST(Texture)
 		ASSERT_EQUAL(texture_sample_nearest(texture, 0.75f, 0.25f, 1u), 1101u);
 		ASSERT_EQUAL(texture_sample_nearest(texture, 0.25f, 0.75f, 1u), 1011u);
 		ASSERT_EQUAL(texture_sample_nearest(texture, 0.75f, 0.75f, 1u), 1111u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f, 0.01f, 1u), 1001u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f, 0.49f, 1u), 1001u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f, 0.01f, 1u), 1001u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f, 0.49f, 1u), 1001u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f, 0.01f, 1u), 1101u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f, 0.49f, 1u), 1101u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f, 0.01f, 1u), 1101u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f, 0.49f, 1u), 1101u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f, 0.51f, 1u), 1011u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f, 0.99f, 1u), 1011u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f, 0.51f, 1u), 1011u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f, 0.99f, 1u), 1011u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f, 0.51f, 1u), 1111u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f, 0.99f, 1u), 1111u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f, 0.51f, 1u), 1111u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f, 0.99f, 1u), 1111u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f - 256.0f, 0.01f - 256.0f, 1u), 1001u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f - 256.0f, 0.49f - 256.0f, 1u), 1001u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f - 256.0f, 0.01f - 256.0f, 1u), 1001u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f - 256.0f, 0.49f - 256.0f, 1u), 1001u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f - 256.0f, 0.01f - 256.0f, 1u), 1101u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f - 256.0f, 0.49f - 256.0f, 1u), 1101u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f - 256.0f, 0.01f - 256.0f, 1u), 1101u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f - 256.0f, 0.49f - 256.0f, 1u), 1101u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f - 256.0f, 0.51f - 256.0f, 1u), 1011u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.01f - 256.0f, 0.99f - 256.0f, 1u), 1011u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f - 256.0f, 0.51f - 256.0f, 1u), 1011u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.49f - 256.0f, 0.99f - 256.0f, 1u), 1011u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f - 256.0f, 0.51f - 256.0f, 1u), 1111u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.51f - 256.0f, 0.99f - 256.0f, 1u), 1111u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f - 256.0f, 0.51f - 256.0f, 1u), 1111u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, 0.99f - 256.0f, 0.99f - 256.0f, 1u), 1111u);
 		ASSERT_EQUAL(texture_sample_nearest(texture, 0.5f / 4.0f, 0.5f / 4.0f, 0u), 1002u);
 		ASSERT_EQUAL(texture_sample_nearest(texture, 1.5f / 4.0f, 0.5f / 4.0f, 0u), 1102u);
 		ASSERT_EQUAL(texture_sample_nearest(texture, 2.5f / 4.0f, 0.5f / 4.0f, 0u), 1202u);
@@ -474,6 +506,9 @@ START_TEST(Texture)
 		ASSERT_EQUAL(texture_sample_nearest(texture, -53.0f, -17.0f,  2u), 1000u);
 		ASSERT_EQUAL(texture_sample_nearest(texture, -53.0f, -17.0f,  3u), 1000u);
 		ASSERT_EQUAL(texture_sample_nearest(texture, -53.0f, -17.0f, 15u), 1000u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, -255.7f, -255.7f, 0u), 1112u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, -100.7f, -64.7f, 0u), 1112u);
+		ASSERT_EQUAL(texture_sample_nearest(texture, -84.7f, 0.3f, 0u), 1112u);
 		// TODO: Test the optimization template flags.
 	}
 		// TODO: Test reading pixels from SafePointer with and without a specified row index.